# Pandas

Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A Series is a one dimensional array like object, while a DataFrame is a two dimensional, size-mutable, and potentially heterogenous tabular data structure with labeled axes (rows and columns).

In [2]:
import pandas as pd

In [4]:
# Series

data = [1, 2, 3, 4, 5]
series = pd.Series(data)
print(series)
type(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


pandas.core.series.Series

In [5]:
data_dict = {'a':1, 'b':2, 'c':3, 'd':4, 'e':5}
series_dict = pd.Series(data_dict)
print(series_dict)
type(series_dict) 

a    1
b    2
c    3
d    4
e    5
dtype: int64


pandas.core.series.Series

In [7]:
# DataFrame

data = {
    'Name': ['Satvik', 'Ishita', 'Rajni'],
    'Age': [19, 20, 50],
    'City': ['Chennai', 'Chennai', 'Delhi'],
}

df = pd.DataFrame(data)
print(df)
type(df)

     Name  Age     City
0  Satvik   19  Chennai
1  Ishita   20  Chennai
2   Rajni   50    Delhi


pandas.core.frame.DataFrame

In [8]:
import numpy as np
np.array(df)

array([['Satvik', 19, 'Chennai'],
       ['Ishita', 20, 'Chennai'],
       ['Rajni', 50, 'Delhi']], dtype=object)

In [13]:
# Create data frame from a list of dictionaries

data = [
    {'Name': 'Satvik', 'Age': 19, 'City': 'Chennai'},
    {'Name': 'Ishita', 'Age': 20, 'City': 'Chennai'},
    {'Name': 'Rajni', 'Age': 50, 'City': 'Delhi'},
]

df = pd.DataFrame(data)
print(df)

     Name  Age     City
0  Satvik   19  Chennai
1  Ishita   20  Chennai
2   Rajni   50    Delhi


In [15]:
print(df['Name'])
type(df['Name'])

0    Satvik
1    Ishita
2     Rajni
Name: Name, dtype: object


pandas.core.series.Series

In [16]:
print(df.loc[0])
type(df.loc[0])

Name     Satvik
Age          19
City    Chennai
Name: 0, dtype: object


pandas.core.series.Series

In [17]:
df.at[1, 'Age']

np.int64(20)

In [28]:
df.iloc[1][0:]

Name     Ishita
Age          20
City    Chennai
Name: 1, dtype: object

In [29]:
df.loc[1][1:]

Age          20
City    Chennai
Name: 1, dtype: object

In [30]:
df

Unnamed: 0,Name,Age,City
0,Satvik,19,Chennai
1,Ishita,20,Chennai
2,Rajni,50,Delhi


In [33]:
df['Salary']=[20000, 2000, 1000000]

In [34]:
df

Unnamed: 0,Name,Age,City,Salary
0,Satvik,19,Chennai,20000
1,Ishita,20,Chennai,2000
2,Rajni,50,Delhi,1000000


In [46]:
new_df = df.drop('Salary', axis=1, inplace=False)

In [47]:
df

Unnamed: 0,Name,Age,City,Salary
0,Satvik,19,Chennai,20000
1,Ishita,20,Chennai,2000
2,Rajni,50,Delhi,1000000


In [48]:
new_df

Unnamed: 0,Name,Age,City
0,Satvik,19,Chennai
1,Ishita,20,Chennai
2,Rajni,50,Delhi


In [49]:
df.describe()

Unnamed: 0,Age,Salary
count,3.0,3.0
mean,29.666667,340666.666667
std,17.61628,571070.340092
min,19.0,2000.0
25%,19.5,11000.0
50%,20.0,20000.0
75%,35.0,510000.0
max,50.0,1000000.0


In [59]:
# Merging and joining dataframes

df1 = pd.DataFrame({'Key':['A', 'B', 'C'], 'Value1': [1, 2, 3]})
df2 = pd.DataFrame({'Key':['A', 'B', 'D'], 'Value2': [4, 5, 6]})

In [60]:
df1

Unnamed: 0,Key,Value1
0,A,1
1,B,2
2,C,3


In [61]:
df2

Unnamed: 0,Key,Value2
0,A,4
1,B,5
2,D,6


In [62]:
# Merge data frame on key columns

pd.merge(df1, df2, on="Key", how="inner")

Unnamed: 0,Key,Value1,Value2
0,A,1,4
1,B,2,5


In [63]:
pd.merge(df1, df2, on="Key", how="outer")

Unnamed: 0,Key,Value1,Value2
0,A,1.0,4.0
1,B,2.0,5.0
2,C,3.0,
3,D,,6.0


In [66]:
pd.merge(df1, df2, on="Key", how="left")

Unnamed: 0,Key,Value1,Value2
0,A,1,4.0
1,B,2,5.0
2,C,3,


In [67]:
pd.merge(df1, df2, on="Key", how="right")

Unnamed: 0,Key,Value1,Value2
0,A,1.0,4
1,B,2.0,5
2,D,,6
