# Pandas - DataFrames and Series
***
Pandas is a powerful data manipulation library in python, widely used for data analysis and data cleaning. It Provides two primary data structures: `Series` and `DataFrame`. Series is a 1-D array-like object, while DataFrames is a 2-D, size-mutable, potentially hetrogeneous tabular data structure with labled axes (rows and columns)

In [2]:
import pandas as pd

## Series
## A panda series is a 1-D array like object that can hold any data type. It is similar to column in table

data = [1,2,3,4,5]
series = pd.Series(data)
print(series)
print(type(series))

0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [3]:
## Create series from dictionary
## Keys will be index or ID and values will be the column values
data = {'a':1, 'b': 2, 'c':3}
series = pd.Series(data)
print(series)

a    1
b    2
c    3
dtype: int64


In [4]:
## Custom series
## We can use custom indexes from list
data = [10,20,30]
index = ['s','u','m']
pd.Series(data,index)


s    10
u    20
m    30
dtype: int64

In [None]:
## DataFrame
## Create a DataFrame from a dictionary of list
## 2D- Dimensional with multiple rows and columns
data = {
    "Name": ["Sumeet", "Vidura", "Dhaumuya"],
    "Age": [25,46,45],
    "City": ["Nagpur", "Hastinapur", "Mithila"]
}

df = pd.DataFrame(data)
print(df)
print(type(df))

       Name  Age        City
0    Sumeet   25      Nagpur
1    Vidura   46  Hastinapur
2  Dhaumuya   45     Mithila
<class 'pandas.core.frame.DataFrame'>


In [6]:
import numpy as np
arr = np.array(data)
print(arr)

{'Name': ['Sumeet', 'Vidura', 'Dhaumuya'], 'Age': [25, 46, 45], 'City': ['Nagpur', 'Hastinapur', 'Mithila']}


In [None]:
## Create a DataFrame from list of dictionaries
data = [
    {"Name": "Sumeet","Age": 25,"City": "Nagpur"}, # row 1
    {"Name": "Sumee","Age": 26,"City": "agpur"}, # row 2
    {"Name": "Sume","Age": 27,"City": "gpur"}, # row 3
    {"Name": "Sum","Age": 28,"City": "pur"} # row 4
]
data = pd.DataFrame(data)
print(data)
print(type(data))


     Name  Age    City
0  Sumeet   25  Nagpur
1   Sumee   26   agpur
2    Sume   27    gpur
3     Sum   28     pur
<class 'pandas.core.frame.DataFrame'>


In [8]:
df = pd.read_csv("Details.csv")
print(df.head(5))
print()
print(df.tail(5))

  Order ID  Amount  Profit  Quantity     Category      Sub-Category  \
0  B-25681    1096     658         7  Electronics  Electronic Games   
1  B-26055    5729      64        14    Furniture            Chairs   
2  B-25955    2927     146         8    Furniture         Bookcases   
3  B-26093    2847     712         8  Electronics          Printers   
4  B-25602    2617    1151         4  Electronics            Phones   

   PaymentMode  
0          COD  
1          EMI  
2          EMI  
3  Credit Card  
4  Credit Card  

     Order ID  Amount  Profit  Quantity     Category Sub-Category PaymentMode
1495  B-25700       7      -3         2     Clothing  Hankerchief         COD
1496  B-25757    3151     -35         7     Clothing     Trousers         EMI
1497  B-25973    4141    1698        13  Electronics     Printers         COD
1498  B-25698       7      -2         1     Clothing  Hankerchief         COD
1499  B-25993    4363     305         5    Furniture       Tables         EMI


In [9]:
## Create a DataFrame from list of dictionaries
data = [
    {"Name": "Sumeet","Age": 25,"City": "Nagpur"},
    {"Name": "Sumee","Age": 26,"City": "agpur"},
    {"Name": "Sume","Age": 27,"City": "gpur"},
    {"Name": "Sum","Age": 28,"City": "pur"}
]
data = pd.DataFrame(data)
print(data)
print(type(data))
print(data["Name"])
print(type(data["Name"]))



     Name  Age    City
0  Sumeet   25  Nagpur
1   Sumee   26   agpur
2    Sume   27    gpur
3     Sum   28     pur
<class 'pandas.core.frame.DataFrame'>
0    Sumeet
1     Sumee
2      Sume
3       Sum
Name: Name, dtype: object
<class 'pandas.core.series.Series'>


In [24]:
## .loc() stands with row index gets particular rows
## it gets from index label i.e. if index is 'a' then it gets that row
print(data.loc[0])
print()
print(data.loc[0]['City'])


Name    Sumeet
Age         25
City    Nagpur
Name: 0, dtype: object

Nagpur


In [33]:
## .iloc() uses index of the matrix
## gets 0th position of the index column if 0th index is z then it gets zth row
print(data.iloc[3])
print()
print(data.iloc[0,0])

Name    Sum
Age      28
City    pur
Name: 3, dtype: object

Sumeet


In [21]:
data

Unnamed: 0,Name,Age,City
0,Sumeet,25,Nagpur
1,Sumee,26,agpur
2,Sume,27,gpur
3,Sum,28,pur


In [22]:
## to get specified element from the dataframe
## .at[index, 'column_name]
data.at[2,'City']


'gpur'

In [None]:
## to get specific row and column using row and column index use
## .iat[row_index,column_index]
data.iat[3,2]

'pur'

### Data manipulation with dataframe

In [35]:
data

Unnamed: 0,Name,Age,City
0,Sumeet,25,Nagpur
1,Sumee,26,agpur
2,Sume,27,gpur
3,Sum,28,pur


In [37]:
### add new cloumn to data frame
data["Salary"] = [10000,20000,30000,40000]

In [38]:
data

Unnamed: 0,Name,Age,City,Salary
0,Sumeet,25,Nagpur,10000
1,Sumee,26,agpur,20000
2,Sume,27,gpur,30000
3,Sum,28,pur,40000


In [44]:
## To remove a row use `drop(label_name,axis=0,inplace=False)` keyword
## by default it will check only row
## to check specific column use axis=1
## By default drop operations are not permanent
## to remove the row or column permanently use inplace=True attribute in the drop function by default this will be false
data.drop("Salary",axis=1,inplace=True)


In [45]:
data

Unnamed: 0,Name,Age,City
0,Sumeet,25,Nagpur
1,Sumee,26,agpur
2,Sume,27,gpur
3,Sum,28,pur


In [47]:
### To increment a value in the column or row
data["Age"] = data["Age"]+1

In [48]:
data

Unnamed: 0,Name,Age,City
0,Sumeet,26,Nagpur
1,Sumee,27,agpur
2,Sume,28,gpur
3,Sum,29,pur
