# Reading Data From Various Data Source Using Pandas


In [1]:
import pandas as pd
from io import StringIO

data = """
{ 
	"employee_name" : "James" , 
	"email" : "james@gmail.com" , 
	"job_profile" : [ { "title1":"Team Lead" , "title2" : "Sr. Developer"} ] 
}
"""
df = pd.read_json(StringIO(data))

In [2]:
df

Unnamed: 0,employee_name,email,job_profile
0,James,james@gmail.com,"{'title1': 'Team Lead', 'title2': 'Sr. Develop..."


In [3]:
# orient
df.to_json(orient="index")

'{"0":{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

In [4]:
df.to_json(orient="records")  # Record By Record

'[{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}]'

# CSV From URL to DataFrame

In [10]:
dataFrame = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
    header=None,
)

In [11]:
dataFrame.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


# DataFrame to New CSV File

In [15]:
# DataFrame to Csv File
dataFrame.to_csv("Wine.csv")

# HTML to DataFrame

In [None]:
! pip install lxml

In [25]:
url = "https://en.wikipedia.org/wiki/List_of_country_calling_codes"
pd.read_html(url, match="Country")[0]
# Read all The Tables & Form DataFrames
# match => specific Table With Same Column Name

Unnamed: 0,Base,Calling code,Country,Note
0,Almirante Brown Antarctic Base,54,Argentina,
1,Amundsen–Scott South Pole Station,1,United States,
2,Artigas Base,598,Uruguay,
3,Asuka Station,81,Japan,
4,Base Presidente Eduardo Frei Montalva and Vill...,56,Chile,
...,...,...,...,...
60,Troll Station,47,Norway,
61,Vernadsky Research Base,380,Ukraine,
62,Vostok Station,7,Russia,
63,Wasa Research Station,46,Sweden,


# Excel File(.xlsx) to DataFrame

In [None]:
!pip install openpyxl

In [30]:
df_excel = pd.read_excel("data.xlsx")

df_excel

Unnamed: 0,Name,Age
0,Krish,32
1,Jack,34
2,John,31


# Pickle File	[V.IMP]
- #### Pickle in Pyhton is used in serializing and deserializing a Python object Structure
- #### Process of Converting Python Object to Byte Stream to store it in a file/database, maintain program state across sessions or transport data over the network

In [34]:
# Creating Pickle File
df_excel.to_pickle("dataFrameExcelFile")

In [35]:
# Reading Pickle File
pd.read_pickle("dataFrameExcelFile")

Unnamed: 0,Name,Age
0,Krish,32
1,Jack,34
2,John,31
