In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from datetime import datetime

### Missing data

In [2]:
df = pd.DataFrame(data={'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]})

In [3]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [4]:
df.dropna(axis=0)

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [5]:
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


In [6]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [7]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [8]:
df.dropna(axis=0)

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [9]:
df.mean()

A    1.5
B    5.0
C    2.0
dtype: float64

In [10]:
df.fillna(df.mean())

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,5.0,2
2,1.5,5.0,3


### Group-by

In [11]:
# Create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}

In [12]:
df = pd.DataFrame(data=data)

In [13]:
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [14]:
df.groupby('Company').max()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,Sarah,350
GOOG,Sam,200
MSFT,Vanessa,340


In [15]:
df.groupby('Company').mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,296.5
GOOG,160.0
MSFT,232.0


In [16]:
df.groupby('Company').describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [17]:
df.groupby('Company').describe().transpose()

Unnamed: 0,Company,FB,GOOG,MSFT
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,160.0,232.0
Sales,std,75.660426,56.568542,152.735065
Sales,min,243.0,120.0,124.0
Sales,25%,269.75,140.0,178.0
Sales,50%,296.5,160.0,232.0
Sales,75%,323.25,180.0,286.0
Sales,max,350.0,200.0,340.0


### Dataframe Operations

In [18]:
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [19]:
df['col2'].unique()

array([444, 555, 666], dtype=int64)

In [20]:
df['col2'].nunique()

3

In [21]:
df['col2'].value_counts

<bound method IndexOpsMixin.value_counts of 0    444
1    555
2    666
3    444
Name: col2, dtype: int64>

In [22]:
df['col2'].value_counts()

444    2
555    1
666    1
Name: col2, dtype: int64

In [23]:
df
[(df['col1']>2) & (df['col2'] == 444)]

[0    False
 1    False
 2    False
 3     True
 dtype: bool]

In [24]:
def times2 (num):
    return (num*2)

In [25]:
df.apply(times2)

Unnamed: 0,col1,col2,col3
0,2,888,abcabc
1,4,1110,defdef
2,6,1332,ghighi
3,8,888,xyzxyz


In [26]:
df['new'] = df['col1'].apply(times2)

In [27]:
df

Unnamed: 0,col1,col2,col3,new
0,1,444,abc,2
1,2,555,def,4
2,3,666,ghi,6
3,4,444,xyz,8


In [28]:
del df['new']

In [29]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [30]:
df.columns

Index(['col1', 'col2', 'col3'], dtype='object')

In [31]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [32]:
df['col2'].sort_values()

0    444
3    444
1    555
2    666
Name: col2, dtype: int64

In [33]:
df['col2'].sort_values(ascending=False)

2    666
1    555
3    444
0    444
Name: col2, dtype: int64

### Data input and output

In [34]:
pwd

'H:\\python\\tsa\\UDEMY_TSA_FINAL\\02-Pandas'

In [35]:
pd.read_csv('example.csv')

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [36]:
df = pd.read_csv('H://python//tsa//UDEMY_TSA_FINAL//02-Pandas/example.csv')

In [37]:
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [38]:
df.to_csv('new_example.csv', index=False)

In [39]:
pd.read_excel('Excel_Sample.xlsx',sheet_name='Sheet1')

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [40]:
my_list_tables = pd.read_html('https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/')

In [41]:
my_list_tables

[                                         Bank NameBank            CityCity  \
 0                                    Almena State Bank              Almena   
 1                           First City Bank of Florida   Fort Walton Beach   
 2                                 The First State Bank       Barboursville   
 3                                   Ericson State Bank             Ericson   
 4                     City National Bank of New Jersey              Newark   
 5                                        Resolute Bank              Maumee   
 6                                Louisa Community Bank              Louisa   
 7                                 The Enloe State Bank              Cooper   
 8                  Washington Federal Bank for Savings             Chicago   
 9      The Farmers and Merchants State Bank of Argonia             Argonia   
 10                                 Fayette County Bank          Saint Elmo   
 11   Guaranty Bank, (d/b/a BestBank in Georgia & Mi

In [42]:
type(my_list_tables)

list

In [43]:
len(my_list_tables)

1

In [44]:
my_list_tables[0]

Unnamed: 0,Bank NameBank,CityCity,StateSt,CertCert,Acquiring InstitutionAI,Closing DateClosing,FundFund
0,Almena State Bank,Almena,KS,15426,Equity Bank,"October 23, 2020",10538
1,First City Bank of Florida,Fort Walton Beach,FL,16748,"United Fidelity Bank, fsb","October 16, 2020",10537
2,The First State Bank,Barboursville,WV,14361,"MVB Bank, Inc.","April 3, 2020",10536
3,Ericson State Bank,Ericson,NE,18265,Farmers and Merchants Bank,"February 14, 2020",10535
4,City National Bank of New Jersey,Newark,NJ,21111,Industrial Bank,"November 1, 2019",10534
5,Resolute Bank,Maumee,OH,58317,Buckeye State Bank,"October 25, 2019",10533
6,Louisa Community Bank,Louisa,KY,58112,Kentucky Farmers Bank Corporation,"October 25, 2019",10532
7,The Enloe State Bank,Cooper,TX,10716,"Legend Bank, N. A.","May 31, 2019",10531
8,Washington Federal Bank for Savings,Chicago,IL,30570,Royal Savings Bank,"December 15, 2017",10530
9,The Farmers and Merchants State Bank of Argonia,Argonia,KS,17719,Conway Bank,"October 13, 2017",10529
