In [20]:
import pandas as pd

In [22]:
from io import StringIO # imports the StringIO class from Python's io module.
Data = '{"employee_name": "James", "email": "james@gmail.com", "job_profile":\
     [{"title1":"Team Lead", "title2":"Sr. Developer"}]}' # JSON-formatted string
df = pd.read_json(StringIO(Data))

* pd.read_json() reads JSON data into a DataFrame, and it can handle:<br>
    JSON strings directly.<br>
    File-like objects (e.g., files, URLs, StringIO).<br>
    URLs (e.g., web API endpoints returning JSON).<br>
    $ Note: $ StringIO wraps a string as a file-like object

In [23]:
df

Unnamed: 0,employee_name,email,job_profile
0,James,james@gmail.com,"{'title1': 'Team Lead', 'title2': 'Sr. Develop..."


In [None]:
df.to_json() # cols as keys of dict

'{"employee_name":{"0":"James"},"email":{"0":"james@gmail.com"},"job_profile":{"0":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

In [None]:
df.to_json(orient='index') # idx as key of dict

'{"0":{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

In [None]:
df.to_json(orient='records') # cols as keys of dict

'[{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}]'

In [59]:
df=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


When header=None is specified, pandas will:<br>
    Assign default integer column labels (0, 1, 2, etc.) to the columns, instead of using the first row of the CSV as column names.<br>
    Treat all rows as data, including the first one, which would normally be used as column names.

In [60]:
df.to_csv('wine.csv')

In [61]:
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/"
df = pd.read_html(url)

In [62]:
df[0]

Unnamed: 0,Bank Name,City,State,Cert,Aquiring Institution,Closing Date,Fund Sort ascending
0,The First National Bank of Lindsay,Lindsay,Oklahoma,4134,"First Bank & Trust Co., Duncan, OK","October 18, 2024",10547
1,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
2,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
3,Heartland Tri-State Bank,Elkhart,Kansas,25851,"Dream First Bank, N.A.","July 28, 2023",10544
4,First Republic Bank,San Francisco,California,59017,"JPMorgan Chase Bank, N.A.","May 1, 2023",10543
5,Signature Bank,New York,New York,57053,"Flagstar Bank, N.A.","March 12, 2023",10540
6,Silicon Valley Bank,Santa Clara,California,24735,First Citizens Bank & Trust Company,"March 10, 2023",10539
7,Almena State Bank,Almena,Kansas,15426,Equity Bank,"October 23, 2020",10538
8,First City Bank of Florida,Fort Walton Beach,Florida,16748,"United Fidelity Bank, fsb","October 16, 2020",10537
9,The First State Bank,Barboursville,West Virginia,14361,"MVB Bank, Inc.","April 3, 2020",10536


In [79]:
url = 'https://en.wikipedia.org/wiki/Mobile_country_code'
df = pd.read_html(url,match='Country',header=None)

In [71]:
df[0]

Unnamed: 0,Mobile country code,Country,ISO 3166,Mobile network codes,National MNC authority,Remarks
0,289,A Abkhazia,GE-AB,List of mobile network codes in Abkhazia,,MCC is not listed by ITU
1,412,Afghanistan,AF,List of mobile network codes in Afghanistan,,
2,276,Albania,AL,List of mobile network codes in Albania,,
3,603,Algeria,DZ,List of mobile network codes in Algeria,,
4,544,American Samoa (United States of America),AS,List of mobile network codes in American Samoa,,
...,...,...,...,...,...,...
247,452,Vietnam,VN,List of mobile network codes in the Vietnam,,
248,543,W Wallis and Futuna,WF,List of mobile network codes in Wallis and Futuna,,
249,421,Y Yemen,YE,List of mobile network codes in the Yemen,,
250,645,Z Zambia,ZM,List of mobile network codes in Zambia,,


In [78]:
df_excel = pd.read_excel('sample_data.xlsx', header=None)
df_excel

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,"GAIL UTKARSH SUPER 100, KANPUR (ENGINEERING)",,,,,,,,,,...,,,,,,,,,,
1,SESSION -2023-2024,,,,,,,,,,...,,,,,,,,,,
2,PROJECT MANAGER NAME: MR. DEVENDRA KUMAR SHARMA,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,FINAL ADMISSION,,,
4,SL NO,SPONSOR,CENTRE,ROLL NO.,STUDENTS NAME,GENDER,CAT,Email-Id,Mobile No. 1,Mobile No. 2,...,CHEMISTRY,MATHS,TOTAL,AIR,CAT RANK,Q/NQ,COLLEGE TYPE (IIT/NIT/IIIT/OTHER GOVT ENG COLLEG),COLLEGE NAME,BRANCH NAME,PLACEMENT COMPANY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,35,GAIL,KANPUR,2401035,MAHENDRA KUMARANURAGI,MALE,SC,2022mahendrak786@gmail.com,6388020069,9793454106,...,19,11,39,,4123 PREP,Q,,,,
103,99,GAIL,KANPUR,2401100,ZAFRUL HUSAIN ANSARI,MALE,OBC,zafrulhusainansari@gmail.com,8475942076,7351866782,...,41,12,82,,,N,,,,
104,6,GAIL,KANPUR,2401006,AMAN KUMAR,MALE,SC,amank651169@gmail.com,6306330518,6306330518,...,24,3,36,,PREP -5168,Q,,,,
105,,,,,,,,,,,...,,,0,,,,,,,


$ Ques: $ What is a pickle file?<br>
$ Ans: $ Pickle in Python is primarily used in serializing and deserializing a Python object structure. In other words, it's the process of converting a Python object into a byte stream to store it in a file/database, maintain program state across sessions, or transport data over the network.

In simple words:<br>
*    Pickle is a module in Python that helps you convert Python objects (like lists, dictionaries, or custom objects) into a format that can be saved, sent over a network, or stored in a database. This process is called serialization.<br>
*    Serialization means turning a Python object (which is in a more complex, readable form) into a byte stream (a sequence of bytes, which is just data in a format that can be written to files or sent over networks).<br>
*    After you've saved or sent the byte stream, you can deserialize it. Deserialization is the process of converting the byte stream back into the original Python object, so you can work with it just like before.

In [None]:
df_excel.to_pickle('excel') # serialization

In [None]:
pd.read_pickle('excel')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,"GAIL UTKARSH SUPER 100, KANPUR (ENGINEERING)",,,,,,,,,,...,,,,,,,,,,
1,SESSION -2023-2024,,,,,,,,,,...,,,,,,,,,,
2,PROJECT MANAGER NAME: MR. DEVENDRA KUMAR SHARMA,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,FINAL ADMISSION,,,
4,SL NO,SPONSOR,CENTRE,ROLL NO.,STUDENTS NAME,GENDER,CAT,Email-Id,Mobile No. 1,Mobile No. 2,...,CHEMISTRY,MATHS,TOTAL,AIR,CAT RANK,Q/NQ,COLLEGE TYPE (IIT/NIT/IIIT/OTHER GOVT ENG COLLEG),COLLEGE NAME,BRANCH NAME,PLACEMENT COMPANY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,35,GAIL,KANPUR,2401035,MAHENDRA KUMARANURAGI,MALE,SC,2022mahendrak786@gmail.com,6388020069,9793454106,...,19,11,39,,4123 PREP,Q,,,,
103,99,GAIL,KANPUR,2401100,ZAFRUL HUSAIN ANSARI,MALE,OBC,zafrulhusainansari@gmail.com,8475942076,7351866782,...,41,12,82,,,N,,,,
104,6,GAIL,KANPUR,2401006,AMAN KUMAR,MALE,SC,amank651169@gmail.com,6306330518,6306330518,...,24,3,36,,PREP -5168,Q,,,,
105,,,,,,,,,,,...,,,0,,,,,,,
