# PANDAS Basics

In [4]:
import pandas as pd
import numpy as np

In [6]:
np.arange(0,20).reshape(5,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [10]:
df=pd.DataFrame(data=np.arange(0,20).reshape(5,4),index=["Row1","Row2","Row3","Row4","Row5"],columns=["Col1","Col2","Col3","Col4"])

In [12]:
df.head()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [17]:
type(df)

pandas.core.frame.DataFrame

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Row1 to Row5
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Col1    5 non-null      int32
 1   Col2    5 non-null      int32
 2   Col3    5 non-null      int32
 3   Col4    5 non-null      int32
dtypes: int32(4)
memory usage: 120.0+ bytes


In [26]:
df.describe()

Unnamed: 0,Col1,Col2,Col3,Col4
count,5.0,5.0,5.0,5.0
mean,8.0,9.0,10.0,11.0
std,6.324555,6.324555,6.324555,6.324555
min,0.0,1.0,2.0,3.0
25%,4.0,5.0,6.0,7.0
50%,8.0,9.0,10.0,11.0
75%,12.0,13.0,14.0,15.0
max,16.0,17.0,18.0,19.0


In [28]:
type(df['Col1'])

pandas.core.series.Series

In [32]:
df[['Col1','Col2','Col3']]

Unnamed: 0,Col1,Col2,Col3
Row1,0,1,2
Row2,4,5,6
Row3,8,9,10
Row4,12,13,14
Row5,16,17,18


In [39]:
df.loc[['Row2','Row3']]

Unnamed: 0,Col1,Col2,Col3,Col4
Row2,4,5,6,7
Row3,8,9,10,11


In [41]:
df.head()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [43]:
df.iloc[2:4,0:2]

Unnamed: 0,Col1,Col2
Row3,8,9
Row4,12,13


In [45]:
df.iloc[1:,1:]

Unnamed: 0,Col2,Col3,Col4
Row2,5,6,7
Row3,9,10,11
Row4,13,14,15
Row5,17,18,19


In [47]:
df.head()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [61]:
df.iloc[:,[0]+[-1]]

Unnamed: 0,Col1,Col4
Row1,0,3
Row2,4,7
Row3,8,11
Row4,12,15
Row5,16,19


In [63]:
df.head()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [65]:
df.iloc[:,1:].values

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15],
       [17, 18, 19]])

In [72]:
df.isnull().sum()

Col1    0
Col2    0
Col3    0
Col4    0
dtype: int64

In [82]:
df = pd.DataFrame(data=[[1,2,3],[4,np.nan,6]],index=["Row1","Row2"], columns=["Col1","Col2","Col3"])

In [84]:
df.head()

Unnamed: 0,Col1,Col2,Col3
Row1,1,2.0,3
Row2,4,,6


In [86]:
df

Unnamed: 0,Col1,Col2,Col3
Row1,1,2.0,3
Row2,4,,6


In [92]:
df.isnull().sum()

Col1    0
Col2    1
Col3    0
dtype: int64

In [96]:
df.isnull().sum()==0

Col1     True
Col2    False
Col3     True
dtype: bool

In [98]:
df['Col3'].value_counts()

Col3
3    1
6    1
Name: count, dtype: int64

In [100]:
df

Unnamed: 0,Col1,Col2,Col3
Row1,1,2.0,3
Row2,4,,6


In [106]:
df[df['Col3']>2]

Unnamed: 0,Col1,Col2,Col3
Row1,1,2.0,3
Row2,4,,6


## Pandas with CSV files

In [127]:
from io import StringIO

In [133]:
data=('col1,col2, col3\n'
     'x,y,z\n'
     'a,b,2\n'
     'c,d,3')

In [135]:
type(data)

str

In [137]:
StringIO(data)

<_io.StringIO at 0x14f58fc9a80>

In [139]:
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,x,y,z
1,a,b,2
2,c,d,3


In [141]:
pd.read_csv(StringIO(data),usecols = ['col1','col2'])

Unnamed: 0,col1,col2
0,x,y
1,a,b
2,c,d


In [143]:
data = ('a,b,c,d\n'
            '1,2,3,4\n'
            '5,6,7,8\n'
            '9,10,11')

In [147]:
df=pd.read_csv(StringIO(data))

In [149]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   a       3 non-null      int64  
 1   b       3 non-null      int64  
 2   c       3 non-null      int64  
 3   d       2 non-null      float64
dtypes: float64(1), int64(3)
memory usage: 228.0 bytes


In [154]:
df.head()

Unnamed: 0,a,b,c,d
0,1,2,3,4.0
1,5,6,7,8.0
2,9,10,11,


In [156]:
df.isnull().sum()

a    0
b    0
c    0
d    1
dtype: int64

In [164]:
df=pd.read_csv(StringIO(data),dtype = {'a':int, 'b':int, 'c':int})

In [166]:
df

Unnamed: 0,a,b,c,d
0,1,2,3,4.0
1,5,6,7,8.0
2,9,10,11,


In [168]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   a       3 non-null      int32  
 1   b       3 non-null      int32  
 2   c       3 non-null      int32  
 3   d       2 non-null      float64
dtypes: float64(1), int32(3)
memory usage: 192.0 bytes


In [172]:
df.dtypes

a      int32
b      int32
c      int32
d    float64
dtype: object

In [176]:
pd.read_csv(StringIO(data),index_col=0)

Unnamed: 0_level_0,b,c,d
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,3,4.0
5,6,7,8.0
9,10,11,


# pandas with HTML

In [9]:
import pandas as pd
html = pd.read_html('https://en.wikipedia.org/wiki/Mobile_country_code')

In [10]:
type(html)

list

In [13]:
html

[   MCC  MNC Brand      Operator       Status Bands (MHz)  \
 0    1    1  TEST  Test network  Operational         any   
 1    1    1  TEST  Test network  Operational         any   
 2  999   99   NaN  Internal use  Operational         any   
 3  999  999   NaN  Internal use  Operational         any   
 
                               References and notes  
 0                                              NaN  
 1                                              NaN  
 2  Internal use in private networks, no roaming[6]  
 3  Internal use in private networks, no roaming[6]  ,
      Mobile country code                                    Country ISO 3166  \
 0                    289                                 A Abkhazia    GE-AB   
 1                    412                                Afghanistan       AF   
 2                    276                                    Albania       AL   
 3                    603                                    Algeria       DZ   
 4               

In [16]:
html[0]

Unnamed: 0,MCC,MNC,Brand,Operator,Status,Bands (MHz),References and notes
0,1,1,TEST,Test network,Operational,any,
1,1,1,TEST,Test network,Operational,any,
2,999,99,,Internal use,Operational,any,"Internal use in private networks, no roaming[6]"
3,999,999,,Internal use,Operational,any,"Internal use in private networks, no roaming[6]"


In [18]:
html[1]

Unnamed: 0,Mobile country code,Country,ISO 3166,Mobile network codes,National MNC authority,Remarks
0,289,A Abkhazia,GE-AB,List of mobile network codes in Abkhazia,,MCC is not listed by ITU
1,412,Afghanistan,AF,List of mobile network codes in Afghanistan,,
2,276,Albania,AL,List of mobile network codes in Albania,,
3,603,Algeria,DZ,List of mobile network codes in Algeria,,
4,544,American Samoa (United States of America),AS,List of mobile network codes in American Samoa,,
...,...,...,...,...,...,...
247,452,Vietnam,VN,List of mobile network codes in the Vietnam,,
248,543,W Wallis and Futuna,WF,List of mobile network codes in Wallis and Futuna,,
249,421,Y Yemen,YE,List of mobile network codes in the Yemen,,
250,645,Z Zambia,ZM,List of mobile network codes in Zambia,,


In [26]:
html = pd.read_html('https://en.wikipedia.org/wiki/Economy_of_the_United_States',match = "Government debt")

In [28]:
html

[                                                    0  \
 0   New York City, the world's principal fintech a...   
 1                                            Currency   
 2                                         Fiscal year   
 3                                 Trade organizations   
 4                                       Country group   
 5                                          Statistics   
 6                                          Population   
 7                                                 GDP   
 8                                            GDP rank   
 9                                          GDP growth   
 10                                     GDP per capita   
 11                                GDP per capita rank   
 12                                      GDP by sector   
 13                                   GDP by component   
 14                                    Inflation (CPI)   
 15                      Population below poverty line   
 16           

In [30]:
html[0]

Unnamed: 0,0,1
0,"New York City, the world's principal fintech a...","New York City, the world's principal fintech a..."
1,Currency,United States dollar (USD) US Dollar Index
2,Fiscal year,October 1 – September 30
3,Trade organizations,"WTO, G-20, G7, OECD, USMCA, APEC and others"
4,Country group,Advanced economy[4] High-income economy[5] Div...
5,Statistics,Statistics
6,Population,"340,332,281 (August 30, 2023)[9]"
7,GDP,$28.78 trillion (nominal; 2024)[10] $28.78 tr...
8,GDP rank,1st (nominal; 2024) 2nd (PPP; 2024)
9,GDP growth,2.1% (2022)[11] 2.5% (2023)[11] 2.7% (2024)[11]
