##  PythonClass 09 -  Reading Different Formats in Pandas --- part 3

In [1]:
from io import StringIO, BytesIO
import pandas as pd
import numpy as np

In [2]:
data = ('col1,col2,col3\n'
            'a,b,7\n'
            'c,d,8\n'
            'e,f,6')

'col1,col2,col3\na,b,7\nc,d,8\ne,f,6'

In [4]:
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,a,b,7
1,c,d,8
2,e,f,6


In [5]:
## Read from specific columns
df=pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ['COL1', 'COL3'])

In [6]:
df.head()

Unnamed: 0,col1,col3
0,a,7
1,c,8
2,e,6


In [18]:
## Save data into csv format
df.to_csv('Test.csv')

In [7]:
## Specifying columns data types

df=pd.read_csv(StringIO(data),dtype={'b':int,'c':np.float,'a':'Int64'})

In [8]:
df

Unnamed: 0,col1,col2,col3
0,a,b,7
1,c,d,8
2,e,f,6


In [9]:
## check the datatype
df.dtypes

col1    object
col2    object
col3     int64
dtype: object

In [None]:

## Index columns and training delimiters

In [10]:
data = ('Stu_id,Stu_name,CourseName,Cgpa\n'
           '1,Hassan,DataScience,3.7\n'
            '8,Babar,ComputerScience,3.0\n'
            '9,KamranAkmal,Electrical.2.8')

In [11]:
pd.read_csv(StringIO(data),index_col=0)

Unnamed: 0_level_0,Stu_name,CourseName,Cgpa
Stu_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Hassan,DataScience,3.7
8,Babar,ComputerScience,3.0
9,KamranAkmal,Electrical.2.8,


In [12]:
#Now, What if we donont id
data_noCol = ('a,b,c\n'
           '4,apple,bat,\n'
            '8,orange,cow,')

In [13]:
pd.read_csv(StringIO(data_noCol))

Unnamed: 0,a,b,c
4,apple,bat,
8,orange,cow,


In [14]:
pd.read_csv(StringIO(data_noCol),index_col=False)

Unnamed: 0,a,b,c
0,4,apple,bat
1,8,orange,cow


In [15]:

## Combining usecols and index_col
data = ('Stu_id,Stu_name,CourseName,Cgpa\n'
           '1,Hassan,DataScience,3.7\n'
            '8,Babar,ComputerScience,3.0\n'
            '9,KamranAkmal,Electrical.2.8')

In [16]:

pd.read_csv(StringIO(data), usecols=['Stu_name', 'CourseName'],index_col=False)

Unnamed: 0,Stu_name,CourseName
0,Hassan,DataScience
1,Babar,ComputerScience
2,KamranAkmal,Electrical.2.8


In [21]:
#Reading CSV File:::

df1 = pd.read_csv('test.csv',',')

In [22]:
df1

Unnamed: 0.1,Unnamed: 0,col1,col2,col3
0,0,a,b,7
1,1,c,d,8
2,2,e,f,6


In [30]:
#Reading CSV File with ; :::
df1 = pd.read_csv('test2.csv',';',index_col = False)

In [32]:
df1.drop('Unnamed: 0',axis =1)

Unnamed: 0,col1,col2,col3
0,x,1,
1,a,2,
2,c,3,


### Reading HTML Content

In [None]:
## URL to CSV

df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',
                 sep='\t')

In [None]:
df.head()

In [33]:
url = 'https://www.fdic.gov/bank/individual/failed/banklist.html'

dfs = pd.read_html(url)

In [34]:
dfs[0]

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date
0,The First State Bank,Barboursville,WV,14361,"MVB Bank, Inc.","April 3, 2020"
1,Ericson State Bank,Ericson,NE,18265,Farmers and Merchants Bank,"February 14, 2020"
2,City National Bank of New Jersey,Newark,NJ,21111,Industrial Bank,"November 1, 2019"
3,Resolute Bank,Maumee,OH,58317,Buckeye State Bank,"October 25, 2019"
4,Louisa Community Bank,Louisa,KY,58112,Kentucky Farmers Bank Corporation,"October 25, 2019"
...,...,...,...,...,...,...
556,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB","July 27, 2001"
557,Malta National Bank,Malta,OH,6629,North Valley Bank,"May 3, 2001"
558,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,"February 2, 2001"
559,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,"December 14, 2000"


In [36]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)

In [37]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [None]:

## Read Json to CSV

In [42]:

Data = '{"employee_name": "James", "email": "james@gmail.com", "job_profile": [{"title1":"Team Lead", "title2":"Sr. Developer"}]}'
pd.read_json(Data)

Unnamed: 0,email,employee_name,job_profile
0,james@gmail.com,James,"{'title2': 'Sr. Developer', 'title1': 'Team Le..."


In [43]:
# convert Json to csv

In [44]:
Data.to_csv('Data.csv')

AttributeError: 'str' object has no attribute 'to_csv'

In [45]:
# convert Json to different json formats

Data.to_json(orient="index")

AttributeError: 'str' object has no attribute 'to_json'

### Reading Excel Files

In [46]:

df_excel=pd.read_excel('FlightPredictionDataset/data_train.xlsx')

In [47]:
df_excel.head()

Unnamed: 0,Airline,Date_of_Journey,Source,Destination,Route,Dep_Time,Arrival_Time,Duration,Total_Stops,Additional_Info,Price
0,IndiGo,24/03/2019,Banglore,New Delhi,BLR → DEL,22:20,01:10 22 Mar,2h 50m,non-stop,No info,3897
1,Air India,1/05/2019,Kolkata,Banglore,CCU → IXR → BBI → BLR,05:50,13:15,7h 25m,2 stops,No info,7662
2,Jet Airways,9/06/2019,Delhi,Cochin,DEL → LKO → BOM → COK,09:25,04:25 10 Jun,19h,2 stops,No info,13882
3,IndiGo,12/05/2019,Kolkata,Banglore,CCU → NAG → BLR,18:05,23:30,5h 25m,1 stop,No info,6218
4,IndiGo,01/03/2019,Banglore,New Delhi,BLR → NAG → DEL,16:50,21:35,4h 45m,1 stop,No info,13302


### Pickling
All pandas objects are equipped with to_pickle methods which use Python’s cPickle module to save data structures to disk using the pickle format.

In [None]:
df_excel.to_pickle('df_excel')

In [None]:

df=pd.read_pickle('df_excel')