## Pandas

#### * Data Analysis Library
#### * Series =  Single Dimension
#### * DataFrame = Multidimensional

In [5]:
import pandas as pd

In [6]:
s1 = pd.Series([1,2,3,4,5])

In [7]:
s1

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [8]:
type(s1)

pandas.core.series.Series

In [9]:
s1.index

RangeIndex(start=0, stop=5, step=1)

In [10]:
s1.values

array([1, 2, 3, 4, 5], dtype=int64)

In [11]:
s2 = pd.Series([11,22,33,44,55], index=["a","b","c","d","e"])

In [12]:
s2

a    11
b    22
c    33
d    44
e    55
dtype: int64

In [13]:
s1 + s2

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
dtype: float64

In [14]:
s3 = pd.Series(range(10))

In [15]:
s3

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [16]:
s4 = pd.Series(range(10,20))

In [17]:
s4

0    10
1    11
2    12
3    13
4    14
5    15
6    16
7    17
8    18
9    19
dtype: int64

In [18]:
s3 + s4

0    10
1    12
2    14
3    16
4    18
5    20
6    22
7    24
8    26
9    28
dtype: int64

In [19]:
import numpy as np

In [20]:
arr1 = np.arange(5)

In [21]:
arr1

array([0, 1, 2, 3, 4])

In [22]:
arr2 = np.arange(7)

In [23]:
arr2

array([0, 1, 2, 3, 4, 5, 6])

In [24]:
arr1 + arr2
# Returns error because both arrays are of different shapes

ValueError: operands could not be broadcast together with shapes (5,) (7,) 

In [None]:
s2[1:4]

b    22
c    33
d    44
dtype: int64

### Lambda Functions : Anonymous Functions (IIF)

In [None]:
new = pd.Series([12,23,44,56,43,57,], index=["Abdul","Farhan","Asad","Hamza","Subhan","Abdullah"])

new

Abdul       12
Farhan      23
Asad        44
Hamza       56
Subhan      43
Abdullah    57
dtype: int64

In [None]:
def myfunc(marks):
    return marks/100

In [None]:
new = new.apply(myfunc)

In [None]:
new = new.apply(lambda x: np.where(x>40, "Pass","Fail"))

In [None]:
new

Abdul       Fail
Farhan      Fail
Asad        Fail
Hamza       Fail
Subhan      Fail
Abdullah    Fail
dtype: object

In [None]:
a = np.linspace(1,10,11)

In [None]:
a

array([ 1. ,  1.9,  2.8,  3.7,  4.6,  5.5,  6.4,  7.3,  8.2,  9.1, 10. ])

In [None]:
new.count()

6

In [None]:
new.value_counts()

Fail    6
Name: count, dtype: int64

In [None]:
s7 = ([11,2,3,np.nan,np.nan,6])

In [None]:
s7

[11, 2, 3, nan, nan, 6]

### Pandas Data Frame

In [None]:
students = {"roll_no":[1,2,3,4,5],
            "names":["Abdul","Asad","Farhan","Hamza","Ali"],
            "courses": ["Python","Numpy","Pandas","Numpy","Pandas"],
            "mode": ["Online","Onsite","Online","Online","Onsite"] }

In [25]:
students

{'roll_no': [1, 2, 3, 4, 5],
 'names': ['Abdul', 'Asad', 'Farhan', 'Hamza', 'Ali'],
 'courses': ['Python', 'Numpy', 'Pandas', 'Numpy', 'Pandas'],
 'mode': ['Online', 'Onsite', 'Online', 'Online', 'Onsite']}

In [26]:
df = pd.DataFrame(students)

In [27]:
df

Unnamed: 0,roll_no,names,courses,mode
0,1,Abdul,Python,Online
1,2,Asad,Numpy,Onsite
2,3,Farhan,Pandas,Online
3,4,Hamza,Numpy,Online
4,5,Ali,Pandas,Onsite


#### Accessing Data 
##### Columns

In [29]:
df[['courses','names']]

Unnamed: 0,courses,names
0,Python,Abdul
1,Numpy,Asad
2,Pandas,Farhan
3,Numpy,Hamza
4,Pandas,Ali


In [30]:
df['courses']

0    Python
1     Numpy
2    Pandas
3     Numpy
4    Pandas
Name: courses, dtype: object

In [31]:
df['country'] = "Pakistan" 

In [32]:
df

Unnamed: 0,roll_no,names,courses,mode,country
0,1,Abdul,Python,Online,Pakistan
1,2,Asad,Numpy,Onsite,Pakistan
2,3,Farhan,Pandas,Online,Pakistan
3,4,Hamza,Numpy,Online,Pakistan
4,5,Ali,Pandas,Onsite,Pakistan


In [33]:
df['city'] = ["Fsd", "Lhr","Lhr","khi","Isb"]

In [34]:
df

Unnamed: 0,roll_no,names,courses,mode,country,city
0,1,Abdul,Python,Online,Pakistan,Fsd
1,2,Asad,Numpy,Onsite,Pakistan,Lhr
2,3,Farhan,Pandas,Online,Pakistan,Lhr
3,4,Hamza,Numpy,Online,Pakistan,khi
4,5,Ali,Pandas,Onsite,Pakistan,Isb


In [35]:
df["Age"] = [14,35,24,43,65]

In [36]:
df

Unnamed: 0,roll_no,names,courses,mode,country,city,Age
0,1,Abdul,Python,Online,Pakistan,Fsd,14
1,2,Asad,Numpy,Onsite,Pakistan,Lhr,35
2,3,Farhan,Pandas,Online,Pakistan,Lhr,24
3,4,Hamza,Numpy,Online,Pakistan,khi,43
4,5,Ali,Pandas,Onsite,Pakistan,Isb,65


In [37]:
df["Status"] = ["Pass" if age > 25 else "Fail" for age in df["Age"]]

In [38]:
df

Unnamed: 0,roll_no,names,courses,mode,country,city,Age,Status
0,1,Abdul,Python,Online,Pakistan,Fsd,14,Fail
1,2,Asad,Numpy,Onsite,Pakistan,Lhr,35,Pass
2,3,Farhan,Pandas,Online,Pakistan,Lhr,24,Fail
3,4,Hamza,Numpy,Online,Pakistan,khi,43,Pass
4,5,Ali,Pandas,Onsite,Pakistan,Isb,65,Pass
