In [3]:
import numpy as np
import pandas as pd

# Create a 4x4 numpy array populated with 12 random integers ranged between 0 and 100
dataset = np.random.randint(low=0, high=101, size=(4, 4))

# Define the column names for the created 4x4 numpy array
column_names = ["Eleanor", "Chidi", "Tahani", "Jason"]

# 3. Create a Pandas DataFrame using the the 4x4 numpy array and the column names
dataframe = pd.DataFrame(dataset, columns=column_names)

print("The entire DataFrame:")
print(dataframe)

print("Rows #0, #1, and #2:")
print(dataframe.head(3), "\n")

print("Row #2:")
print(dataframe.iloc[[2]], "\n")

print("Rows #1, #2, and #3:")
print(dataframe[1:4], "\n")

print("Columns 'Jason':")
print(dataframe["Jason"], "\n")

The entire DataFrame:
   Eleanor  Chidi  Tahani  Jason
0       36     30      99     45
1       74     58       5      4
2       35      1      93     74
3       62     74      41     18
Rows #0, #1, and #2:
   Eleanor  Chidi  Tahani  Jason
0       36     30      99     45
1       74     58       5      4
2       35      1      93     74 

Row #2:
   Eleanor  Chidi  Tahani  Jason
2       35      1      93     74 

Rows #1, #2, and #3:
   Eleanor  Chidi  Tahani  Jason
1       74     58       5      4
2       35      1      93     74
3       62     74      41     18 

Columns 'Jason':
0    45
1     4
2    74
3    18
Name: Jason, dtype: int64 



In [6]:
# Create a fifth column named Janet, which is populated with the row-by-row sums of Tahani and Jason

dataframe["Janet"] = dataframe["Tahani"] + dataframe["Jason"]

print("The New DataFrame:")
print(dataframe)

# Create a sixth column named Sum, which is populated with the row-by-row sums of all columns.
row_by_row_sum = 0
for name in ["Eleanor", "Chidi", "Tahani", "Jason", "Janet"]:
    row_by_row_sum += dataframe[name]
dataframe["Sum"] = row_by_row_sum

print("The New DataFrame:")
print(dataframe)

The New DataFrame:
   Eleanor  Chidi  Tahani  Jason  Janet  Sum
0       36     30      99     45    144  354
1       74     58       5      9     14  160
2       35      1      93     74    167  370
3       62     74      41     18     59  254
The New DataFrame:
   Eleanor  Chidi  Tahani  Jason  Janet  Sum
0       36     30      99     45    144  354
1       74     58       5      9     14  160
2       35      1      93     74    167  370
3       62     74      41     18     59  254


In [7]:
# Pandas provides two different ways to duplicate a DataFrame:
# 1. Referencing. If you assign a DataFrame to a new variable, any change to the DataFrame or to the new variable will be reflected in the other.
# 2. Copying. If you call the pd.DataFrame.copy method, you create a true independent copy. Changes to the original DataFrame or to the copy will not be reflected in the other.
# The difference betwenen the two days is subtle, but important.


# Create a reference by assigning dataframe to a new variable.
print("Experiment with a reference:")
reference_to_dataframe = dataframe

# Print the starting value of a particular cell.
print("  Starting value of dataframe: %d" % dataframe['Jason'][1])
print("  Starting value of reference_to_dataframe: %d\n" % reference_to_dataframe['Jason'][1])

# Modify a cell in dataframe.
dataframe.at[1, 'Jason'] = dataframe['Jason'][1] + 10
print("  Updated dataframe: %d" % dataframe['Jason'][1])
print("  Updated reference_to_dataframe: %d\n\n" % reference_to_dataframe['Jason'][1])

# Create a true copy of dataframe
print("Experiment with a true copy:")
copy_of_dataframe = dataframe.copy()

# Print the starting value of a particular cell.
print("  Starting value of dataframe: %d" % dataframe['Sum'][1])
print("  Starting value of copy_of_dataframe: %d\n" % copy_of_dataframe['Sum'][1])

# Modify a cell in dataframe.
dataframe.at[1, 'Sum'] = dataframe['Sum'][1] + 100
print("  Updated dataframe: %d" % dataframe['Sum'][1])
print("  copy_of_dataframe does not get updated: %d" % copy_of_dataframe['Sum'][1])

Experiment with a reference:
  Starting value of dataframe: 9
  Starting value of reference_to_dataframe: 9

  Updated dataframe: 19
  Updated reference_to_dataframe: 19


Experiment with a true copy:
  Starting value of dataframe: 160
  Starting value of copy_of_dataframe: 160

  Updated dataframe: 260
  copy_of_dataframe does not get updated: 160
