### Reading Data From Different Sources


In [10]:
import pandas as pd
from io import StringIO
Data = '{"employee_name": "James", "email": "james@gmail.com", "job_profile": [{"title1":"Team Lead", "title2":"Sr. Developer"}]}'
df = pd.read_json(StringIO(Data))

In [8]:
df

Unnamed: 0,employee_name,email,job_profile
0,James,james@gmail.com,"{'title1': 'Team Lead', 'title2': 'Sr. Develop..."


In [6]:
df.to_json()

'{"employee_name":{"0":"James"},"email":{"0":"james@gmail.com"},"job_profile":{"0":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

In [7]:
df.to_json(orient='index')

'{"0":{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

In [8]:
df.to_json(orient='records')

'[{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}]'

In [11]:
df=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",header=None)

In [10]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [12]:
df.to_csv("online_data.csv")

In [None]:
# Importing necessary libraries
# html5lib is a library for parsing HTML documents
# beautifulsoup4 is a library for parsing HTML and XML documents
# lxml is used for parsing XML and HTML documents

# diff between lxml and beautifulsoup4:
# lxml is a library that provides a way to parse XML and HTML documents, while BeautifulSoup is a library that provides a way to navigate and search through the parse tree created by lxml or other parsers.
# lxml is faster and more efficient for parsing large documents, while BeautifulSoup is more user-friendly and easier to use for smaller documents.
# beautifulsoup4 is a library that provides a way to navigate and search through the parse tree created by lxml or other parsers.

!pip install lxml
!pip install html5lib
!pip install beautifulsoup4



In [18]:
url="https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/"

df = pd.read_html(url)

In [21]:
df[0].head()

Unnamed: 0,Bank Name,City,State,Cert,Acquiring Institution,Closing Date,Fund Sort ascending
0,The Santa Anna National Bank,Santa Anna,Texas,5520,Coleman County State Bank,"June 27, 2025",10549
1,Pulaski Savings Bank,Chicago,Illinois,28611,Millennium Bank,"January 17, 2025",10548
2,The First National Bank of Lindsay,Lindsay,Oklahoma,4134,"First Bank & Trust Co., Duncan, OK","October 18, 2024",10547
3,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
4,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545


In [27]:
url="https://en.wikipedia.org/wiki/Mobile_country_code"
pd.read_html(url,match="Country",header=0)[0].head()

Unnamed: 0,Mobile country code,Country,ISO 3166,Mobile network codes,National MNC authority,Remarks
0,289,A Abkhazia,GE-AB,List of mobile network codes in Abkhazia,,MCC is not listed by ITU
1,412,Afghanistan,AF,List of mobile network codes in Afghanistan,,
2,276,Albania,AL,List of mobile network codes in Albania,,
3,603,Algeria,DZ,List of mobile network codes in Algeria,,
4,544,American Samoa (United States of America),AS,List of mobile network codes in American Samoa,,


In [None]:
# openpyxl is a library to read/write Excel 2010 xlsx/xlsm/xltx/xltm files
# It is used to read and write Excel files in Python.
# It is a dependency for pandas to read/write Excel files.
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl

   ---------------------------------------- 0/2 [et-xmlfile]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [ope

In [44]:
df_excel=pd.read_excel('data.xlsx')
df_excel


Unnamed: 0,Name,Age
0,ms,32
1,Jack,34
2,John,31


In [None]:
# pickle is a Python module that serializes and de-serializes Python objects

# to_pickle is a method in pandas that allows you to save a DataFrame to a file in pickle format 
# it will serialize the DataFrame and save it to a file with a .pkl extension

# serialization is the process of converting a Python object into a byte stream,

df_excel.to_pickle('df_excel')

In [None]:
# read_pickle is a method in pandas that allows you to read a DataFrame from a file in pickle format
# it will deserialize the DataFrame from a file with a .pkl extension

# de-serialization is the process of converting a byte stream back into a Python object

pd.read_pickle('df_excel')

Unnamed: 0,Name,Age
0,ms,32
1,Jack,34
2,John,31
