# Intro to Pandas Library

The pandas library consist mostly of two data types
- Series
- DataFrames: A 2D object to represent `tabular` data

## DataFrames Characteristics
- Mutable
- Index based
- A full class

## Classic data representation formats
- JSON
- CSV



In [2]:
import pandas as pd

In [3]:
# Create a new dataFrame
df_one = pd.DataFrame(columns=['Column1', 'Column2'])
df_one

Unnamed: 0,Column1,Column2


In [4]:
# Another way
df_one.columns = ['First', 'Second']
df_one

Unnamed: 0,First,Second


In [5]:
# Add some rows
data1 = {
    'First': [1, 2, 'Real'],     # key = column, values = list of data
    'Second': [22, 44, 'Madrid']
}

# Create new df with data
df_two = pd.DataFrame(data1)
df_two

Unnamed: 0,First,Second
0,1,22
1,2,44
2,Real,Madrid


In [6]:
# Combine two dfs
df_three = pd.concat([df_one, df_two])
df_three

Unnamed: 0,First,Second
0,1,22
1,2,44
2,Real,Madrid


In [7]:
# Add data to df_one
data = {
    'First': [66, 77, 'Hello'],
    'Second': [88, 99, 'CS4580']
}

# Add it to frame
df_one = pd.DataFrame(data)

#Combine them
df_three = pd.concat([df_one, df_two])
df_three

Unnamed: 0,First,Second
0,66,88
1,77,99
2,Hello,CS4580
0,1,22
1,2,44
2,Real,Madrid


In [8]:
# Update indexes as you combine them
df_three.reset_index(drop=True, inplace=True)
df_three

Unnamed: 0,First,Second
0,66,88
1,77,99
2,Hello,CS4580
3,1,22
4,2,44
5,Real,Madrid


In [11]:
# Add df column-wise
col_data = {
    'Third': [88, 99, 11]
}
df_four = pd.DataFrame(col_data)
# Now combine them by column, add the axis =1 parameter
df_combined = pd.concat([df_three, df_four], axis=1)
df_combined

Unnamed: 0,First,Second,Third
0,66,88,88.0
1,77,99,99.0
2,Hello,CS4580,11.0
3,1,22,
4,2,44,
5,Real,Madrid,


## Task: Sales Data

In [18]:
# TODO: Define a df called df_sales with two columns: Date, Amount
import pandas as pd
df_sales = pd.DataFrame(columns=['Date', 'Amount'])

# TODO: Add sample data: 3 rows
data = {
    'Date': ['9/11/2024','9/10/2024', '9/9/2024'],
    'Amount': [150, 250, 125]
}
df_sales = pd.DataFrame(data) # Add it to frame

# TODO: Create a new df with two more rows of data, same columns as df_sales
df_sales_new = pd.DataFrame({
    'FirstName': ['Batu','Ece', 'George'],
    'LastName': ['Sens','Tanli', 'Curious']
})
df_sales_new

# TODO: Combine them and make sure indexes are correct
df_sales = pd.concat([df_sales, df_sales_new], ignore_index=True)
df_sales

# TODO: Add a new column called: 'Product' with 4 rows of data, and combine it to original df_sales
new_column = pd.DataFrame({'Product':['A', 'B', 'C']})
df_sales['Product'] = new_column
df_sales


Unnamed: 0,Date,Amount,FirstName,LastName,Product
0,9/11/2024,150.0,,,A
1,9/10/2024,250.0,,,B
2,9/9/2024,125.0,,,C
3,,,Batu,Sens,
4,,,Ece,Tanli,
5,,,George,Curious,
