### Data Reading & Writing into a File using Pandas 

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_table('Data\Data1.txt')

In [3]:
df

Unnamed: 0,"Anil,90,M"
0,"Ajay,89,M"
1,"Pooja,45,F"
2,"Ravi,18,M"
3,"Ram,29,M"
4,"Srav,35,F"


In [4]:
df1 = pd.read_table('Data\Data1.txt',sep=',')
df1

Unnamed: 0,Anil,90,M
0,Ajay,89,M
1,Pooja,45,F
2,Ravi,18,M
3,Ram,29,M
4,Srav,35,F


In [5]:
df2 = pd.read_table('Data\Data1.txt',sep=',',header=None)
df2

Unnamed: 0,0,1,2
0,Anil,90,M
1,Ajay,89,M
2,Pooja,45,F
3,Ravi,18,M
4,Ram,29,M
5,Srav,35,F


In [7]:
df3 = pd.read_table('Data\Data1.txt',sep=',',header=None,
                   names=['Name','Marks','Gender'])
df3

Unnamed: 0,Name,Marks,Gender
0,Anil,90,M
1,Ajay,89,M
2,Pooja,45,F
3,Ravi,18,M
4,Ram,29,M
5,Srav,35,F


In [8]:
df4 = pd.read_table('Data\Data1.txt',sep=',',header=None,
                   names=['Name','Marks','Gender'],
                   skiprows=[0,2,4])

In [9]:
df4

Unnamed: 0,Name,Marks,Gender
0,Ajay,89,M
1,Ravi,18,M
2,Srav,35,F


In [10]:
df_excel =  pd.read_excel('Data\Data2.xlsx')

In [11]:
df_excel

Unnamed: 0,Name,Marks,Gender
0,Anil,90,M
1,Pooja,76,F
2,Ram,45,M
3,Ajay,88,M
4,Srav,76,F


In [13]:
df_excel.to_csv('Data/new_data_From_Pandas.csv')

### Missing Data in Pandas

In [14]:
s = pd.Series(['Sam',np.nan,'Tim','Tom'])

In [15]:
s

0    Sam
1    NaN
2    Tim
3    Tom
dtype: object

In [16]:
s.isnull()

0    False
1     True
2    False
3    False
dtype: bool

In [17]:
s.notnull()

0     True
1    False
2     True
3     True
dtype: bool

In [18]:
s[3] = None

In [19]:
s

0     Sam
1     NaN
2     Tim
3    None
dtype: object

In [20]:
s.isnull()

0    False
1     True
2    False
3     True
dtype: bool

In [21]:
# Drop the nulls
s.dropna()

0    Sam
2    Tim
dtype: object

In [22]:
s

0     Sam
1     NaN
2     Tim
3    None
dtype: object

In [23]:
df = pd.DataFrame([[1,2,3],[4,np.nan,5],[np.nan,np.nan,np.nan]])
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,5.0
2,,,


In [24]:
df.isnull()

Unnamed: 0,0,1,2
0,False,False,False
1,False,True,False
2,True,True,True


In [27]:
res = df.dropna()

In [26]:
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,5.0
2,,,


In [28]:
res

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


In [29]:
df.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,5.0


In [30]:
df[1] = None

In [31]:
df

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,5.0
2,,,


In [33]:
df.dropna(axis=1,how='all')

Unnamed: 0,0,2
0,1.0,3.0
1,4.0,5.0
2,,


In [34]:
df

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,5.0
2,,,


In [35]:
df.fillna(0)

Unnamed: 0,0,1,2
0,1.0,0,3.0
1,4.0,0,5.0
2,0.0,0,0.0


In [36]:
df.fillna({0:199,1:299,2:399})

Unnamed: 0,0,1,2
0,1.0,299,3.0
1,4.0,299,5.0
2,199.0,299,399.0


In [37]:
df.fillna(df.mean())

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,5.0
2,2.5,,4.0


In [43]:
df

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,5.0
2,,,


### Data Transformations in Pandas

In [45]:
data = pd.DataFrame({'a':['one','two']*3,
                    'b':[1,1,2,3,2,3]})

In [46]:
data

Unnamed: 0,a,b
0,one,1
1,two,1
2,one,2
3,two,3
4,one,2
5,two,3


In [47]:
data.duplicated()

0    False
1    False
2    False
3    False
4     True
5     True
dtype: bool

In [48]:
data.drop_duplicates()

Unnamed: 0,a,b
0,one,1
1,two,1
2,one,2
3,two,3


In [49]:
data['c'] = range(6)
data

Unnamed: 0,a,b,c
0,one,1,0
1,two,1,1
2,one,2,2
3,two,3,3
4,one,2,4
5,two,3,5


In [50]:
data.duplicated(['a','b'],keep='last')

0    False
1    False
2     True
3     True
4    False
5    False
dtype: bool

In [51]:
df = pd.DataFrame({'names':['Tim','Tom','Kate','Sam','Kim'],
             'marks':[50,60,50,60,70]})


In [52]:
df

Unnamed: 0,names,marks
0,Tim,50
1,Tom,60
2,Kate,50
3,Sam,60
4,Kim,70


In [53]:
classes = {'Tim':'A','Tom':'A','Sam':'B',
          'Kate':'B','Kim':'B'}

In [54]:
classes

{'Tim': 'A', 'Tom': 'A', 'Sam': 'B', 'Kate': 'B', 'Kim': 'B'}

In [55]:
df['names']

0     Tim
1     Tom
2    Kate
3     Sam
4     Kim
Name: names, dtype: object

In [56]:
df['names'].str.lower()

0     tim
1     tom
2    kate
3     sam
4     kim
Name: names, dtype: object

In [60]:
n = df['names'].str.capitalize()

In [61]:
df['branches'] = n.map(classes)

In [62]:
df

Unnamed: 0,names,marks,branches
0,Tim,50,A
1,Tom,60,A
2,Kate,50,B
3,Sam,60,B
4,Kim,70,B
