# Intro to Pandas Library

The pandas library consists mostly of two data types:
- Series
-  DataFrames: A 2D object to represent `tabular` data

## DataFrames Characteristics
- Mutable
- Index based
- A full class

## Classic data representation formats

- JSON
- CSV

In [14]:
import pandas as pd

In [11]:
# Create a new DataFrame
df_one = pd.DataFrame(columns=['Column1', 'Column2'])
df_one

Unnamed: 0,Column1,Column2


In [12]:
# Another Way
df_one.columns = ['First', 'Second']
df_one

Unnamed: 0,First,Second


In [13]:
# Add some rows
data1 = {
    'First': [1, 2, 'Real'], # Key = Column, Values = List of Data
    'Second': [22, 44, 'Madrid']
}

# Create a new DF with data
df_two = pd.DataFrame(data1)
df_two

Unnamed: 0,First,Second
0,1,22
1,2,44
2,Real,Madrid


In [15]:
# Combine two dfs
df_three = pd.concat([df_one, df_two])
df_three

Unnamed: 0,First,Second
0,1,22
1,2,44
2,Real,Madrid


In [16]:
# Add data to df_one
data = {
    'First': [33, 44, 'Hello'], # Key = Column, Values = List of Data
    'Second': [55, 66, 'CS4580']
}
df_one = pd.DataFrame(data)

# Combine two dfs
df_three = pd.concat([df_one, df_two])
df_three

Unnamed: 0,First,Second
0,33,55
1,44,66
2,Hello,CS4580
0,1,22
1,2,44
2,Real,Madrid


In [17]:
# Update indexes as you combine them
df_three.reset_index(drop = True, inplace = True)
df_three

Unnamed: 0,First,Second
0,33,55
1,44,66
2,Hello,CS4580
3,1,22
4,2,44
5,Real,Madrid


In [19]:
# Add df column-wise
col_data = {
    'Third': [88, 99, 11]
}
df_four = pd.DataFrame(col_data)

# Now combine them by columns, add the axis = 1 parameter
df_combined = pd.concat([df_three, df_four], axis = 1)
df_combined

Unnamed: 0,First,Second,Third
0,33,55,88.0
1,44,66,99.0
2,Hello,CS4580,11.0
3,1,22,
4,2,44,
5,Real,Madrid,


## Task: Sales Data

In [29]:
# TODO: Define a DF called df_sales with two columns: Date, Amount

# TODO: Add sample data: three rows.

# TODO: Create a new DF with two more rows of data, same columns as df_sales.

# TODO: Combine them, and make sure the indexes are correct.

# TODO: Add a new column called: 'Product' with 4 rows of data, and combine it into original df_sales

In [23]:
# TODO: Define a DF called df_sales with two columns: Date, Amount
df_sales = pd.DataFrame(columns=['Date', 'Amount'])
df_sales

Unnamed: 0,Date,Amount


In [26]:
# TODO: Add sample data: three rows.
sales_data = {
    'Date': ['5th May', '6th May', '7th May'],
    'Amount': [90, 180, 270]
}
df_sales = pd.DataFrame(sales_data)
df_sales

Unnamed: 0,Date,Amount
0,5th May,90
1,6th May,180
2,7th May,270


In [28]:
# TODO: Create a new DF with two more rows of data, same columns as df_sales.
df_sales2 = pd.DataFrame(sales_data)

# TODO: Combine them, and make sure the indexes are correct.
df_sales3 = pd.concat([df_sales, df_sales2])
df_sales3.reset_index(drop = True, inplace = True)
df_sales3

Unnamed: 0,Date,Amount
0,5th May,90
1,6th May,180
2,7th May,270
3,5th May,90
4,6th May,180
5,7th May,270


In [32]:
# TODO: Add a new column called: 'Product' with 4 rows of data, and combine it into original df_sales
sales_col_data = {
    'Product': ['Ball', 'Cartoon', 'Milk', 'Goblin']
}
df_four = pd.DataFrame(sales_col_data)

df_sales = pd.concat([df_sales3, df_four], axis = 1)
df_sales

Unnamed: 0,Date,Amount,Product
0,5th May,90,Ball
1,6th May,180,Cartoon
2,7th May,270,Milk
3,5th May,90,Goblin
4,6th May,180,
5,7th May,270,


# Working with JSON Files

you can handle JSON files directly with Pandas, using the `read_json()` method

In [1]:
import pandas as pd

# Load json file
df_json_data = pd.read_json('..\data\pandas01Data\example-1.json')

In [2]:
# Display df
df_json_data

Unnamed: 0,Column 1,Column 2
0,1,2
1,3,4
2,5,6


In [3]:
# Convert dataFrame to json formatted string
json_format = df_json_data.to_json()
json_format

'{"Column 1":{"0":1,"1":3,"2":5},"Column 2":{"0":2,"1":4,"2":6}}'

# Working with CSV Files

With pandas use the read_csv() method.

In [4]:
df_csv_format = pd.read_csv('..\data\pandas01Data\example-1.csv')
# Display the df
df_csv_format

Unnamed: 0,Branch,Date,Amount
0,Branch A,January 1,500.0
1,Branch B,January 2,250.0
2,Branch A,January 3,300.0


In [7]:
df_csv_format = pd.read_csv('..\data\pandas01Data\example-2.csv', header = None)
# Display the df
df_csv_format

Unnamed: 0,0,1,2
0,Branch A,January 1,500.0
1,Branch B,January 2,250.0
2,Branch A,January 3,300.0


In [8]:
# Save the data back to csv file
# If you do not need the index information, use, index = None
df_csv_format.to_csv('test.cv', index = None)