# Pandas

#### #Series

###### ## convention for importing pandas

In [3]:
import pandas as pd 
import numpy as np

In [4]:
days = pd.Series(['Monday', 'Tuesday', 'Wednesday'])
print(days)

0       Monday
1      Tuesday
2    Wednesday
dtype: object


#### #creating series with a numpy array

In [5]:
days_list = np.array(['Monday', 'Tuesday', 'Wednesday'])
numpy_days = pd.Series(days_list)
print(numpy_days)

0       Monday
1      Tuesday
2    Wednesday
dtype: object


#### # using strings as index

In [6]:
days = pd.Series(['Monday', 'Tuesday', 'Wednesday'],
index=['a', 'b', 'c'])

In [7]:
print (days)

a       Monday
b      Tuesday
c    Wednesday
dtype: object


#### # create series from a dictionary


In [8]:
days1 = pd.Series({'a':'Monday', 'b':'Tuesday', 'c':'Wednesday'})

In [9]:
print(days1)

a       Monday
b      Tuesday
c    Wednesday
dtype: object


In [10]:
days[0]

'Monday'

In [11]:
days[1:]

b      Tuesday
c    Wednesday
dtype: object

In [12]:
days['c']

'Wednesday'

## # Dataframe

In [14]:
df_dict = { 'Country' : [ 'Ghana' , 'Kenya' , 'Nigeria' , 'Togo' ],
            'Capital' : [ 'Accra' , 'Nairobi' , 'Abuja' , 'Lome' ],
            'Population' : [ 10000, 8500, 35000, 12000 ],
            'Age' : [ 60, 70, 80, 75 ]}
df = pd.DataFrame(df_dict, index=[ 2, 4, 6, 8 ])

In [15]:
print (df)

   Country  Capital  Population  Age
2    Ghana    Accra       10000   60
4    Kenya  Nairobi        8500   70
6  Nigeria    Abuja       35000   80
8     Togo     Lome       12000   75


In [16]:
df_list = [[ 'Ghana' , 'Accra' , 10000 , 60 ],
           [ 'Kenya' , 'Nairobi' , 8500 , 70 ],
           [ 'Nigeria' , 'Abuja' , 35000 , 80 ],
           [ 'Togo' , 'Lome' , 12000 , 75 ]]
df1 = pd.DataFrame(df_list, columns=[ 'Country' , 'Capital' , 'Population' , 'Age' ],
      index=[ 2, 4, 6, 8 ])

In [17]:
print(df1)

   Country  Capital  Population  Age
2    Ghana    Accra       10000   60
4    Kenya  Nairobi        8500   70
6  Nigeria    Abuja       35000   80
8     Togo     Lome       12000   75


#### # select the row in the at index 3

In [18]:
df.iloc[ 3 ]

Country        Togo
Capital        Lome
Population    12000
Age              75
Name: 8, dtype: object

#### # select row with index label 6

In [19]:
df.loc[ 6 ]

Country       Nigeria
Capital         Abuja
Population      35000
Age                80
Name: 6, dtype: object

#### # select the Capital column

In [20]:
df[ 'Capital' ]

2      Accra
4    Nairobi
6      Abuja
8       Lome
Name: Capital, dtype: object

In [21]:
df.at[ 6 , 'Country' ]

'Nigeria'

In [22]:
df.iat[ 2 , 0 ]

'Nigeria'

In [23]:
df[ 'Population' ].sum()

65500

In [24]:
df.mean()

Population    16375.00
Age              71.25
dtype: float64

In [25]:
df.describe()

Unnamed: 0,Population,Age
count,4.0,4.0
mean,16375.0,71.25
std,12499.166639,8.539126
min,8500.0,60.0
25%,9625.0,67.5
50%,11000.0,72.5
75%,17750.0,76.25
max,35000.0,80.0


## # Missing data Enigma

In [26]:
df_dict2 = {'Name': ['James', 'Yemen', 'Caro', np.nan],
            'Profession': ['Researcher', 'Artist', 'Doctor', 'Writer'],
            'Experience': [12, np.nan, 10, 8],
            'Height': [np.nan, 175, 180, 150]}

In [27]:
new_df = pd.DataFrame(df_dict2)

In [28]:
print(new_df)

    Name  Profession  Experience  Height
0  James  Researcher        12.0     NaN
1  Yemen      Artist         NaN   175.0
2   Caro      Doctor        10.0   180.0
3    NaN      Writer         8.0   150.0


#### # Check for cells with missing values as True

In [30]:
new_df.isnull()

Unnamed: 0,Name,Profession,Experience,Height
0,False,False,False,True
1,False,False,True,False
2,False,False,False,False
3,True,False,False,False


# remove rows with missing values

In [31]:
new_df.dropna()

Unnamed: 0,Name,Profession,Experience,Height
2,Caro,Doctor,10.0,180.0
