In [1]:
import pandas as pd

In [2]:
#Two types of datastructures in pandas
# Series - A series of data (1 dimensional)
# DataFrames - Tabular data (2 dimensional)

In [3]:
marks = pd.Series([80,70,60]) #Creating series through a list

In [4]:
marks

0    80
1    70
2    60
dtype: int64

In [5]:
marks[1] #Access element through indexes

70

In [6]:
#specifying custom indexes

In [7]:
marks = pd.Series([80,70,60], index=['ali','sara','ahmed'])

In [8]:
marks

ali      80
sara     70
ahmed    60
dtype: int64

In [9]:
marks['ali']

80

In [10]:
#Specifying your own datatype
age = pd.Series([80,70,60], index=['ali','sara','ahmed'],dtype='float')

In [11]:
age

ali      80.0
sara     70.0
ahmed    60.0
dtype: float64

In [12]:
age[age>70] #filtering data in a series

ali    80.0
dtype: float64

In [13]:
age[['sara','ali']] #fancy indexing

sara    70.0
ali     80.0
dtype: float64

In [14]:
age['sara'] = 18 #changing a value in a series

In [15]:
#Creating series from dictionary

In [16]:
age = pd.Series({'ali':80,'sara':70,'ahmed':None}) #Key will be assumed as index

In [17]:
age

ali      80.0
sara     70.0
ahmed     NaN
dtype: float64

In [18]:
pd.isnull(age) #Checking for null values. There is a not null function as well

ali      False
sara     False
ahmed     True
dtype: bool

In [19]:
pd.notnull(age)

ali       True
sara      True
ahmed    False
dtype: bool

In [20]:
age.mean()

75.0

In [21]:
age1=pd.Series([100,50],index=['ali','ahmed'])

In [22]:
display(age1)
display(age)

ali      100
ahmed     50
dtype: int64

ali      80.0
sara     70.0
ahmed     NaN
dtype: float64

In [23]:
age+age1 #Adding two series data

ahmed      NaN
ali      180.0
sara       NaN
dtype: float64

In [24]:
#Dataframes: Tabular data
#pd.DataFrame - Create a dataframe

In [25]:
stds = pd.DataFrame({
    'names':['Sarah','Ahmed','Ali'],
    'age':[50,60,70],
    'cgpa':[3.4,2.9,3.6],
    'address':['Karachi','Lahore','Islamabad']    
})
#Keys will be columns

In [26]:
stds

Unnamed: 0,names,age,cgpa,address
0,Sarah,50,3.4,Karachi
1,Ahmed,60,2.9,Lahore
2,Ali,70,3.6,Islamabad


In [27]:
stds.columns #Displaying the columns of dataframe

Index(['names', 'age', 'cgpa', 'address'], dtype='object')

In [28]:
stds['names']

0    Sarah
1    Ahmed
2      Ali
Name: names, dtype: object

In [29]:
stds[['names','age','cgpa']] #fancy indexing

Unnamed: 0,names,age,cgpa
0,Sarah,50,3.4
1,Ahmed,60,2.9
2,Ali,70,3.6


In [30]:
stds[stds['age']>50] #Students with age > 50, filtering

Unnamed: 0,names,age,cgpa,address
1,Ahmed,60,2.9,Lahore
2,Ali,70,3.6,Islamabad


In [31]:
stds.age

0    50
1    60
2    70
Name: age, dtype: int64

In [32]:
import numpy as np
stds['semester']=np.arange(1,4) #using numpy function to specify values

In [33]:
stds[['semester','names']]

Unnamed: 0,semester,names
0,1,Sarah
1,2,Ahmed
2,3,Ali


In [34]:
doctors = pd.DataFrame({
    'names':['Asad','Rahim'],
    'qualification':['PhD','MS']
},index=['d1','d2'])

In [35]:
doctors

Unnamed: 0,names,qualification
d1,Asad,PhD
d2,Rahim,MS


In [36]:
doctors['address']='Karachi' #Adding a new column

In [37]:
doctors

Unnamed: 0,names,qualification,address
d1,Asad,PhD,Karachi
d2,Rahim,MS,Karachi


In [38]:
doctors.loc['d1'] #Accessing row-wise data using index
#loc and iloc

names               Asad
qualification        PhD
address          Karachi
Name: d1, dtype: object

In [39]:
doctors.iloc[0] #Accessing row-wise data using index location

names               Asad
qualification        PhD
address          Karachi
Name: d1, dtype: object

In [40]:
doctors.iloc[1]

names              Rahim
qualification         MS
address          Karachi
Name: d2, dtype: object

In [41]:
#iloc - position, row number
#loc - to access through index

In [42]:
doctors

Unnamed: 0,names,qualification,address
d1,Asad,PhD,Karachi
d2,Rahim,MS,Karachi


In [43]:
del doctors['address'] #Removing a column

In [44]:
doctors

Unnamed: 0,names,qualification
d1,Asad,PhD
d2,Rahim,MS


In [45]:
del doctors['names']

In [46]:
doctors

Unnamed: 0,qualification
d1,PhD
d2,MS


In [47]:
doctors['names']='Test'

In [48]:
doctors

Unnamed: 0,qualification,names
d1,PhD,Test
d2,MS,Test


In [49]:
stds

Unnamed: 0,names,age,cgpa,address,semester
0,Sarah,50,3.4,Karachi,1
1,Ahmed,60,2.9,Lahore,2
2,Ali,70,3.6,Islamabad,3


In [50]:
stds.T

Unnamed: 0,0,1,2
names,Sarah,Ahmed,Ali
age,50,60,70
cgpa,3.4,2.9,3.6
address,Karachi,Lahore,Islamabad
semester,1,2,3


In [51]:
stds.iloc[::2] #slicing in a dataframe

Unnamed: 0,names,age,cgpa,address,semester
0,Sarah,50,3.4,Karachi,1
2,Ali,70,3.6,Islamabad,3


In [52]:
#Negative indexing
# -1: last value, -2: 2nd last value

In [53]:
stds.iloc[-1]

names             Ali
age                70
cgpa              3.6
address     Islamabad
semester            3
Name: 2, dtype: object

In [54]:
stds.values #Getting numy array from frame

array([['Sarah', 50, 3.4, 'Karachi', 1],
       ['Ahmed', 60, 2.9, 'Lahore', 2],
       ['Ali', 70, 3.6, 'Islamabad', 3]], dtype=object)

In [55]:
stds.drop([0],axis=0) #removing specific rows from dataframe

Unnamed: 0,names,age,cgpa,address,semester
1,Ahmed,60,2.9,Lahore,2
2,Ali,70,3.6,Islamabad,3


In [56]:
#Creating dataframe with different data structures

In [57]:
arr = np.array([['Sarah', 50, 3.4, 'Karachi', 1],
       ['Ahmed', 60, 2.9, 'Lahore', 2],
       ['Ali', 70, 3.6, 'Islamabad', 3],
        ['Shoaib',37,2.9,'Pindi', 4]
               ])

In [58]:
df = pd.DataFrame(arr,columns=['Name','Age','CGPA', 'City','Semester'])

In [59]:
df

Unnamed: 0,Name,Age,CGPA,City,Semester
0,Sarah,50,3.4,Karachi,1
1,Ahmed,60,2.9,Lahore,2
2,Ali,70,3.6,Islamabad,3
3,Shoaib,37,2.9,Pindi,4


In [60]:
lst = [['Sarah', 50, 3.4, 'Karachi', 1],
       ['Ahmed', 60, 2.9, 'Lahore', 2],
       ['Ali', 70, 3.6, 'Islamabad', 3],
        ['Shoaib',37,2.9,'Pindi', 4]
               ]

frame = pd.DataFrame(lst,columns=['A','B','C','D','E'])

In [61]:
#Try creating dataframe from different types of datastructures Slide-16

In [62]:
#Drop rows/columns

In [63]:
frame

Unnamed: 0,A,B,C,D,E
0,Sarah,50,3.4,Karachi,1
1,Ahmed,60,2.9,Lahore,2
2,Ali,70,3.6,Islamabad,3
3,Shoaib,37,2.9,Pindi,4


In [64]:
frame.drop(['A','B'],axis=1) #drop column-wise

Unnamed: 0,C,D,E
0,3.4,Karachi,1
1,2.9,Lahore,2
2,3.6,Islamabad,3
3,2.9,Pindi,4


In [65]:
frame.drop([0,1],axis=0) #row-wise

Unnamed: 0,A,B,C,D,E
2,Ali,70,3.6,Islamabad,3
3,Shoaib,37,2.9,Pindi,4


In [66]:
#Slicing in dataframe

In [67]:
frame.iloc[::2] #start,end,step

Unnamed: 0,A,B,C,D,E
0,Sarah,50,3.4,Karachi,1
2,Ali,70,3.6,Islamabad,3


In [68]:
frame.loc[:,'A':'C']

Unnamed: 0,A,B,C
0,Sarah,50,3.4
1,Ahmed,60,2.9
2,Ali,70,3.6
3,Shoaib,37,2.9


In [69]:
#Apply function

In [70]:
frame.columns = ['Name','Age','CGPA','Address','Semester']

In [71]:
def old_young(age):
    if(age > 40):
        return 'Old'
    else:
        return 'Young'

In [72]:
frame['Status']=frame['Age'].apply(old_young)

In [73]:
frame

Unnamed: 0,Name,Age,CGPA,Address,Semester,Status
0,Sarah,50,3.4,Karachi,1,Old
1,Ahmed,60,2.9,Lahore,2,Old
2,Ali,70,3.6,Islamabad,3,Old
3,Shoaib,37,2.9,Pindi,4,Young


In [74]:
#Lambda functions
old_young = lambda age: "Old" if age>40 else "Young"

In [75]:
frame['Status']=frame['Age'].apply(old_young)

In [76]:
frame

Unnamed: 0,Name,Age,CGPA,Address,Semester,Status
0,Sarah,50,3.4,Karachi,1,Old
1,Ahmed,60,2.9,Lahore,2,Old
2,Ali,70,3.6,Islamabad,3,Old
3,Shoaib,37,2.9,Pindi,4,Young


In [77]:
employee = pd.DataFrame(
{
    'salary':[1000,2000,3000],
    'name':['Mark','Jordan','Yuaan']
}
)

In [78]:
employee

Unnamed: 0,salary,name
0,1000,Mark
1,2000,Jordan
2,3000,Yuaan


In [79]:
employee['Increment']=\
        employee['salary'].apply(lambda salary: 50 if salary<=1000 else 25)

In [80]:
employee

Unnamed: 0,salary,name,Increment
0,1000,Mark,50
1,2000,Jordan,25
2,3000,Yuaan,25


In [81]:
np.abs(employee['Increment'])

0    50
1    25
2    25
Name: Increment, dtype: int64