# Pandas Fundamentals Part - 1


# Pandas Series


In [34]:
import pandas as pd
import numpy as np
s = pd.Series(['a','b','c','d'])
print (s)

0    a
1    b
2    c
3    d
dtype: object


In [35]:
type(s)

pandas.core.series.Series

## Series from numpy array

In [36]:
import pandas as pd
import numpy as np
data = np.array(['a','b','c','d'])
s = pd.Series(data)
print (s)

0    a
1    b
2    c
3    d
dtype: object


In [37]:
type(s)

pandas.core.series.Series

In [38]:
import pandas as pd
import numpy as np
data = np.array(['a','b','c','d'])
s = pd.Series(data,index=[100,101,102,103])
print (s)

100    a
101    b
102    c
103    d
dtype: object


## Series from Dictionary

In [39]:
import pandas as pd
import numpy as np
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data)
print (s)

a    0.0
b    1.0
c    2.0
dtype: float64


In [40]:
import pandas as pd
import numpy as np
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data,index=['b','c','d','a'])
print (s)

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64


# Pandas DataFrame

## Dataframe from List

In [41]:
import pandas as pd

# list of strings
lis = ['USA', 'covid', 'cases', 'are', '2.56m', 'and', 'counting']

df = pd.DataFrame(lis)
df

Unnamed: 0,0
0,USA
1,covid
2,cases
3,are
4,2.56m
5,and
6,counting


In [42]:
type(df)

pandas.core.frame.DataFrame

# DataFrame from Dictionary

In [43]:
import pandas as pd
 

data = {'Name':['Tom', 'Nick', 'Anne', 'Jack'],
        'Age':[20, 21, 19, 18],
        'Gender' : ['M', 'M', 'F','M']}
 
# Create DataFrame
df = pd.DataFrame(data)

df

Unnamed: 0,Name,Age,Gender
0,Tom,20,M
1,Nick,21,M
2,Anne,19,F
3,Jack,18,M


# Printing a column in dataframe 

In [44]:
df['Name']

0     Tom
1    Nick
2    Anne
3    Jack
Name: Name, dtype: object

In [45]:
df.Name

0     Tom
1    Nick
2    Anne
3    Jack
Name: Name, dtype: object

## Add Rows and Columns in Dataframe 

In [46]:
df

Unnamed: 0,Name,Age,Gender
0,Tom,20,M
1,Nick,21,M
2,Anne,19,F
3,Jack,18,M


## Add a row in dataframe 

In [47]:
df2 = {'Name': 'Amy', 'Age': 20, 'Gender': "F"}#0
df = df.append(df2,ignore_index=True)

In [48]:
df

Unnamed: 0,Name,Age,Gender
0,Tom,20,M
1,Nick,21,M
2,Anne,19,F
3,Jack,18,M
4,Amy,20,F


## Add a column in dataframe 

#### We want to add a column named hobby

In [49]:
df['Hobby'] = ["Singing","Dancing","Studying","Gardening","BGMI"]

In [50]:
df

Unnamed: 0,Name,Age,Gender,Hobby
0,Tom,20,M,Singing
1,Nick,21,M,Dancing
2,Anne,19,F,Studying
3,Jack,18,M,Gardening
4,Amy,20,F,BGMI


## Copying the Age column and storing it into a 

In [51]:
a = df.copy()

In [52]:
df.drop(columns="Hobby",inplace =True)

In [53]:
df

Unnamed: 0,Name,Age,Gender
0,Tom,20,M
1,Nick,21,M
2,Anne,19,F
3,Jack,18,M
4,Amy,20,F


In [54]:
a

Unnamed: 0,Name,Age,Gender,Hobby
0,Tom,20,M,Singing
1,Nick,21,M,Dancing
2,Anne,19,F,Studying
3,Jack,18,M,Gardening
4,Amy,20,F,BGMI


In [55]:
df

Unnamed: 0,Name,Age,Gender
0,Tom,20,M
1,Nick,21,M
2,Anne,19,F
3,Jack,18,M
4,Amy,20,F


In [56]:
a

Unnamed: 0,Name,Age,Gender,Hobby
0,Tom,20,M,Singing
1,Nick,21,M,Dancing
2,Anne,19,F,Studying
3,Jack,18,M,Gardening
4,Amy,20,F,BGMI


In [57]:
a = df.Age.copy()

In [58]:
a

0    20
1    21
2    19
3    18
4    20
Name: Age, dtype: int64

# Dataset reading

In [61]:
import pandas as pd
df = pd.read_csv('csv.csv')
df.head(10)

Unnamed: 0,UniqueID,disbursed_amount,asset_cost,ltv,branch_id,supplier_id,manufacturer_id,Current_pincode_ID,Date.of.Birth,Employment.Type,...,SEC.SANCTIONED.AMOUNT,SEC.DISBURSED.AMOUNT,PRIMARY.INSTAL.AMT,SEC.INSTAL.AMT,NEW.ACCTS.IN.LAST.SIX.MONTHS,DELINQUENT.ACCTS.IN.LAST.SIX.MONTHS,AVERAGE.ACCT.AGE,CREDIT.HISTORY.LENGTH,NO.OF_INQUIRIES,loan_default
0,420825,50578,58400,89.55,67,22807,45,1441,01-01-84,Salaried,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0
1,537409,47145,65550,73.23,67,22807,45,1502,31-07-85,Self employed,...,0,0,1991,0,0,1,1yrs 11mon,1yrs 11mon,0,1
2,417566,53278,61360,89.63,67,22807,45,1497,24-08-85,Self employed,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0
3,624493,57513,66113,88.48,67,22807,45,1501,30-12-93,Self employed,...,0,0,31,0,0,0,0yrs 8mon,1yrs 3mon,1,1
4,539055,52378,60300,88.39,67,22807,45,1495,09-12-77,Self employed,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,1,1
5,518279,54513,61900,89.66,67,22807,45,1501,08-09-90,Self employed,...,0,0,1347,0,0,0,1yrs 9mon,2yrs 0mon,0,0
6,529269,46349,61500,76.42,67,22807,45,1502,01-06-88,Salaried,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0
7,510278,43894,61900,71.89,67,22807,45,1501,04-10-89,Salaried,...,0,0,0,0,0,0,0yrs 2mon,0yrs 2mon,0,0
8,490213,53713,61973,89.56,67,22807,45,1497,15-11-91,Self employed,...,0,0,0,0,0,0,4yrs 8mon,4yrs 8mon,1,0
9,510980,52603,61300,86.95,67,22807,45,1492,01-06-68,Salaried,...,0,0,2608,0,0,0,1yrs 7mon,1yrs 7mon,0,0


In [62]:
df = pd.read_csv('tsv.txt',delimiter='\t')
df.head()

Unnamed: 0,UniqueID,disbursed_amount,asset_cost,ltv,branch_id,supplier_id,manufacturer_id,Current_pincode_ID,Date.of.Birth,Employment.Type,...,SEC.SANCTIONED.AMOUNT,SEC.DISBURSED.AMOUNT,PRIMARY.INSTAL.AMT,SEC.INSTAL.AMT,NEW.ACCTS.IN.LAST.SIX.MONTHS,DELINQUENT.ACCTS.IN.LAST.SIX.MONTHS,AVERAGE.ACCT.AGE,CREDIT.HISTORY.LENGTH,NO.OF_INQUIRIES,loan_default
0,420825,50578,58400,89.55,67,22807,45,1441,01-01-84,Salaried,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0
1,537409,47145,65550,73.23,67,22807,45,1502,31-07-85,Self employed,...,0,0,1991,0,0,1,1yrs 11mon,1yrs 11mon,0,1
2,417566,53278,61360,89.63,67,22807,45,1497,24-08-85,Self employed,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0


In [63]:
import pickle


df = pd.read_pickle('pickle.pkl')
df.head()

Unnamed: 0,UniqueID,disbursed_amount,asset_cost,ltv,branch_id,supplier_id,manufacturer_id,Current_pincode_ID,Date.of.Birth,Employment.Type,...,SEC.SANCTIONED.AMOUNT,SEC.DISBURSED.AMOUNT,PRIMARY.INSTAL.AMT,SEC.INSTAL.AMT,NEW.ACCTS.IN.LAST.SIX.MONTHS,DELINQUENT.ACCTS.IN.LAST.SIX.MONTHS,AVERAGE.ACCT.AGE,CREDIT.HISTORY.LENGTH,NO.OF_INQUIRIES,loan_default
0,420825,50578,58400,89.55,67,22807,45,1441,1984-01-01,Salaried,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0
1,537409,47145,65550,73.23,67,22807,45,1502,1985-07-31,Self employed,...,0,0,1991,0,0,1,1yrs 11mon,1yrs 11mon,0,1
2,417566,53278,61360,89.63,67,22807,45,1497,1985-08-24,Self employed,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,0,0
3,624493,57513,66113,88.48,67,22807,45,1501,1993-12-30,Self employed,...,0,0,31,0,0,0,0yrs 8mon,1yrs 3mon,1,1
4,539055,52378,60300,88.39,67,22807,45,1495,1977-12-09,Self employed,...,0,0,0,0,0,0,0yrs 0mon,0yrs 0mon,1,1


# Slicing

In [64]:
import pandas as pd
df = pd.read_csv("boston.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


## select first three rows

In [65]:
df[1:4]

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4


## select the last row in the df


In [66]:

df[-4:]

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
502,503,0.04527,0.0,11.93,0,0.573,6.12,76.7,2.2875,1,273,21.0,396.9,9.08,20.6
503,504,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.9,5.64,23.9
504,505,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0
505,506,0.04741,0.0,11.93,0,0.573,6.03,80.8,2.505,1,273,21.0,396.9,7.88,11.9


In [69]:
df.loc[0:2, ['crim', 'age', 'rad']]

Unnamed: 0,crim,age,rad
0,0.00632,65.2,1
1,0.02731,78.9,2
2,0.02729,61.1,2


In [70]:
df.iloc[0:5, -2:]

Unnamed: 0,lstat,medv
0,4.98,24.0
1,9.14,21.6
2,4.03,34.7
3,2.94,33.4
4,5.33,36.2


## loc gets rows (and/or columns) with particular labels.

## iloc gets rows (and/or columns) at integer locations.