#Reading Data Files of different formats and loading them for analysis

In [None]:
## Reading from json file format and loading it into dataframe

In [5]:
import pandas as pd
from io import StringIO
Data = '{"employee_name":"James","email":"james@gmail.com","job profile":[{"title1":"Team Lead"}]}'
df = pd.read_json(StringIO(Data))
df

Unnamed: 0,employee_name,email,job profile
0,James,james@gmail.com,{'title1': 'Team Lead'}


In [None]:
## convert back to json

In [7]:
df.to_json()

'{"employee_name":{"0":"James"},"email":{"0":"james@gmail.com"},"job profile":{"0":{"title1":"Team Lead"}}}'

In [8]:
df.to_json(orient='index')

'{"0":{"employee_name":"James","email":"james@gmail.com","job profile":{"title1":"Team Lead"}}}'

In [9]:
df.to_json(orient='records')

'[{"employee_name":"James","email":"james@gmail.com","job profile":{"title1":"Team Lead"}}]'

In [None]:
## read csv files

In [10]:
df = pd.read_csv("data.csv",header=None)
df

Unnamed: 0,0,1,2,3,4,5
0,Date,Category,Value,Product,Sales,Region
1,2023-01-01,A,28.0,Product1,754.0,East
2,2023-01-02,B,39.0,Product3,110.0,North
3,2023-01-03,C,32.0,Product2,398.0,East
4,2023-01-04,B,8.0,Product1,522.0,East
5,2023-01-05,B,26.0,Product3,869.0,North
6,2023-01-06,B,54.0,Product3,192.0,West
7,2023-01-07,A,16.0,Product1,936.0,East
8,2023-01-08,C,89.0,Product1,488.0,West
9,2023-01-09,C,37.0,Product3,772.0,West


In [11]:
df.loc[0]

Unnamed: 0,0
0,Date
1,Category
2,Value
3,Product
4,Sales
5,Region


In [13]:
print(type(df.iloc[0,4]))

<class 'str'>


In [19]:
print(df.dtypes)

0    object
1    object
2    object
3    object
4    object
5    object
dtype: object


In [20]:
## to csv
df.to_csv("Final.csv")

In [21]:
!pip install lxml



In [27]:
## Read html files
url = "https://www.fdic.gov/bank-failures/failed-bank-list?combine=&items_per_page=All"
df = pd.read_html(url)

In [None]:
## The above code returns a list of dataframes

In [28]:
df

[                                 Bank Name          City          State  \
 0                     Pulaski Savings Bank       Chicago       Illinois   
 1       The First National Bank of Lindsay       Lindsay       Oklahoma   
 2    Republic First Bank dba Republic Bank  Philadelphia   Pennsylvania   
 3                            Citizens Bank      Sac City           Iowa   
 4                 Heartland Tri-State Bank       Elkhart         Kansas   
 ..                                     ...           ...            ...   
 566                 Sinclair National Bank      Gravette       Arkansas   
 567                    Malta National Bank         Malta           Ohio   
 568        First Alliance Bank & Trust Co.    Manchester  New Hampshire   
 569  The National State Bank of Metropolis    Metropolis       Illinois   
 570                       Bank of Honolulu      Honolulu         Hawaii   
 
       Cert                Acquiring Institution       Closing Date  \
 0    28611    

In [29]:
df[0]

Unnamed: 0,Bank Name,City,State,Cert,Acquiring Institution,Closing Date,Fund Sort ascending
0,Pulaski Savings Bank,Chicago,Illinois,28611,Millennium Bank,"January 17, 2025",10548
1,The First National Bank of Lindsay,Lindsay,Oklahoma,4134,"First Bank & Trust Co., Duncan, OK","October 18, 2024",10547
2,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
3,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
4,Heartland Tri-State Bank,Elkhart,Kansas,25851,"Dream First Bank, N.A.","July 28, 2023",10544
...,...,...,...,...,...,...,...
566,Sinclair National Bank,Gravette,Arkansas,34248,Delta Trust & Bank,"September 7, 2001",4649
567,Malta National Bank,Malta,Ohio,6629,North Valley Bank,"May 3, 2001",4648
568,First Alliance Bank & Trust Co.,Manchester,New Hampshire,34264,Southern New Hampshire Bank & Trust,"February 2, 2001",4647
569,The National State Bank of Metropolis,Metropolis,Illinois,3815,Banterra Bank of Marion,"December 14, 2000",4646


In [None]:
## In the above example, I was taking the entire table to be shown on page 1 so that the url remains same now what if I had to scrap this tables on different pages

In [30]:
df_list = []
for i in range(12):
  url = pd.read_html("https://www.fdic.gov/bank-failures/failed-bank-list?combine=&items_per_page=50&page=%d" %i)
  df_list.append(url[0])
df = pd.concat(df_list)
df

Unnamed: 0,Bank Name,City,State,Cert,Acquiring Institution,Closing Date,Fund Sort ascending
0,Pulaski Savings Bank,Chicago,Illinois,28611,Millennium Bank,"January 17, 2025",10548
1,The First National Bank of Lindsay,Lindsay,Oklahoma,4134,"First Bank & Trust Co., Duncan, OK","October 18, 2024",10547
2,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
3,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
4,Heartland Tri-State Bank,Elkhart,Kansas,25851,"Dream First Bank, N.A.","July 28, 2023",10544
...,...,...,...,...,...,...,...
16,Sinclair National Bank,Gravette,Arkansas,34248,Delta Trust & Bank,"September 7, 2001",4649
17,Malta National Bank,Malta,Ohio,6629,North Valley Bank,"May 3, 2001",4648
18,First Alliance Bank & Trust Co.,Manchester,New Hampshire,34264,Southern New Hampshire Bank & Trust,"February 2, 2001",4647
19,The National State Bank of Metropolis,Metropolis,Illinois,3815,Banterra Bank of Marion,"December 14, 2000",4646


In [31]:
## Based on keywords filtering out which table to pick
url = "https://en.wikipedia.org/wiki/Mobile_country_code"
df = pd.read_html(url,match='Country',header=None)[0]
df

Unnamed: 0,Mobile country code,Country,ISO 3166,Mobile network codes,National MNC authority,Remarks
0,289,A Abkhazia,GE-AB,List of mobile network codes in Abkhazia,,MCC is not listed by ITU
1,412,Afghanistan,AF,List of mobile network codes in Afghanistan,,
2,276,Albania,AL,List of mobile network codes in Albania,,
3,603,Algeria,DZ,List of mobile network codes in Algeria,,
4,544,American Samoa (United States of America),AS,List of mobile network codes in American Samoa,,
...,...,...,...,...,...,...
247,452,Vietnam,VN,List of mobile network codes in the Vietnam,,
248,543,W Wallis and Futuna,WF,List of mobile network codes in Wallis and Futuna,,
249,421,Y Yemen,YE,List of mobile network codes in the Yemen,,
250,645,Z Zambia,ZM,List of mobile network codes in Zambia,,


### Read excel

In [None]:
data = pd.read_excel('Data.xlsx',sheet_name="")
data

In [None]:
## Convert to pickle files
data.to_pickle('data')

In [None]:
pd.read_pickle('data')