# Pandas - DataFrame and Series
Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A series is a 1-Dimensional array like object, while a DataFrame is a 2-Dimensional, size-mutable, and potentially heterogeneou tabular data structure with labeled axes(rows and columns).

In [1]:
! pip install pandas



In [12]:
import pandas as pd


In [13]:
#  Series
# A pandas series is a 1-Dimensional array like object that can hold any data type. It is similar to a column in a table.
data=[1,2,3,4,5]
series=pd.Series(data)
print("Series\n",series)
print(type(series))

Series
 0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [14]:
#  Create a series from dictionary elements
data={'a':1,'b':2,'c':3}
series_dict=pd.Series(data)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [15]:
# Different index

data=[10,20,30]
index=['a','b','c']

pd.Series(data,index=index)

a    10
b    20
c    30
dtype: int64

In [16]:
# Data frame
#  create a dataframe from a dictionary of list

data={
    'Name':['Abhishek','Jack','John'],
    'Age':[25,30,45],
    'City':['Bangalore','New York','Florida']
}

df=pd.DataFrame(data)
print(df)
print(type(df))

       Name  Age       City
0  Abhishek   25  Bangalore
1      Jack   30   New York
2      John   45    Florida
<class 'pandas.core.frame.DataFrame'>


In [17]:
#  Create a dataframe from list of dictionaries

data=[
    {'Name':'Abhishek','age':32,'City':'Bangalore'},
    {'Name':'John','age':32,'City':'Bangalore'},
    {'Name':'Tony','age':32,'City':'Bangalore'},
    {'Name':'Jack','age':32,'City':'Bangalore'}
]

df=pd.DataFrame(data)
print(df)

       Name  age       City
0  Abhishek   32  Bangalore
1      John   32  Bangalore
2      Tony   32  Bangalore
3      Jack   32  Bangalore


In [27]:
df=pd.read_csv('SalesData.csv')

# top 5 records
df.head(5)

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode
0,B-25681,1096,658,7,Electronics,Electronic Games,COD
1,B-26055,5729,64,14,Furniture,Chairs,EMI
2,B-25955,2927,146,8,Furniture,Bookcases,EMI
3,B-26093,2847,712,8,Electronics,Printers,Credit Card
4,B-25602,2617,1151,4,Electronics,Phones,Credit Card


In [28]:
# last 5 records
df.tail(5)

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode
1495,B-25700,7,-3,2,Clothing,Hankerchief,COD
1496,B-25757,3151,-35,7,Clothing,Trousers,EMI
1497,B-25973,4141,1698,13,Electronics,Printers,COD
1498,B-25698,7,-2,1,Clothing,Hankerchief,COD
1499,B-25993,4363,305,5,Furniture,Tables,EMI


In [31]:
#  Accessing data from dataframe
df=pd.DataFrame(data)
df

Unnamed: 0,Name,age,City
0,Abhishek,32,Bangalore
1,John,32,Bangalore
2,Tony,32,Bangalore
3,Jack,32,Bangalore


In [32]:
df['Name']

0    Abhishek
1        John
2        Tony
3        Jack
Name: Name, dtype: object

In [33]:
type(df['Name'])

pandas.core.series.Series

In [58]:
df.loc[0]               # row index

Name     Abhishek
age            32
City    Bangalore
Name: 0, dtype: object

In [59]:
df.iloc[0]

Name     Abhishek
age            32
City    Bangalore
Name: 0, dtype: object

In [60]:
df.loc[0][0]

  df.loc[0][0]


'Abhishek'

In [61]:
df.iloc[0][1]

  df.iloc[0][1]


32

In [62]:
## Accessing Specified Element

df

Unnamed: 0,Name,age,City
0,Abhishek,32,Bangalore
1,John,32,Bangalore
2,Tony,32,Bangalore
3,Jack,32,Bangalore


In [63]:
df.at[1,'Name']

'John'

In [64]:
# Accessing specified element using iat
df.iat[2,0]

'Tony'

In [65]:
# Data Manipulation with Data Frames
df

Unnamed: 0,Name,age,City
0,Abhishek,32,Bangalore
1,John,32,Bangalore
2,Tony,32,Bangalore
3,Jack,32,Bangalore


In [66]:
# Adding a column
df['Salary']=[10000,20000,30000,40000]

In [67]:
df

Unnamed: 0,Name,age,City,Salary
0,Abhishek,32,Bangalore,10000
1,John,32,Bangalore,20000
2,Tony,32,Bangalore,30000
3,Jack,32,Bangalore,40000


In [68]:
# Removing a column
df.drop('Salary',axis=1,inplace=True)

In [69]:
df

Unnamed: 0,Name,age,City
0,Abhishek,32,Bangalore
1,John,32,Bangalore
2,Tony,32,Bangalore
3,Jack,32,Bangalore


In [71]:
# Add age tothe column
df['age']=df['age']+1
df

Unnamed: 0,Name,age,City
0,Abhishek,33,Bangalore
1,John,33,Bangalore
2,Tony,33,Bangalore
3,Jack,33,Bangalore


In [72]:
df.drop(0)         # not a permanent operation

Unnamed: 0,Name,age,City
1,John,33,Bangalore
2,Tony,33,Bangalore
3,Jack,33,Bangalore


In [73]:
df

Unnamed: 0,Name,age,City
0,Abhishek,33,Bangalore
1,John,33,Bangalore
2,Tony,33,Bangalore
3,Jack,33,Bangalore


In [74]:
df.drop(0,inplace=True)

In [75]:
df

Unnamed: 0,Name,age,City
1,John,33,Bangalore
2,Tony,33,Bangalore
3,Jack,33,Bangalore


In [76]:
df=pd.read_csv('SalesData.csv')

In [77]:
df

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode
0,B-25681,1096,658,7,Electronics,Electronic Games,COD
1,B-26055,5729,64,14,Furniture,Chairs,EMI
2,B-25955,2927,146,8,Furniture,Bookcases,EMI
3,B-26093,2847,712,8,Electronics,Printers,Credit Card
4,B-25602,2617,1151,4,Electronics,Phones,Credit Card
...,...,...,...,...,...,...,...
1495,B-25700,7,-3,2,Clothing,Hankerchief,COD
1496,B-25757,3151,-35,7,Clothing,Trousers,EMI
1497,B-25973,4141,1698,13,Electronics,Printers,COD
1498,B-25698,7,-2,1,Clothing,Hankerchief,COD


In [78]:
df.describe()

Unnamed: 0,Amount,Profit,Quantity
count,1500.0,1500.0,1500.0
mean,291.847333,24.642,3.743333
std,461.92462,168.55881,2.184942
min,4.0,-1981.0,1.0
25%,47.75,-12.0,2.0
50%,122.0,8.0,3.0
75%,326.25,38.0,5.0
max,5729.0,1864.0,14.0
