## Reading & Writing CSV Files

In [2]:
import pandas as pd
df= pd.read_csv("stock_data.csv")
df

Unnamed: 0,Stocks Data,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


### Since the csv file has header so other col values read as unnamed
### To avoid this write arg header=1 or skiprows=1

In [4]:
df= pd.read_csv("stock_data.csv" , skiprows=1)
df
# now reading from 2nd line

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


### If our CSV file has no col headers


In [7]:
df= pd.read_csv("stock_data.csv",header=None)
df
#Write header=None to generate 0 based colname else will take first row as colheader


Unnamed: 0,0,1,2,3,4
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [9]:
df= pd.read_csv("stock_data.csv",header=None,names=['ticker',"eps","revenue","price","people"])
df

Unnamed: 0,ticker,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [11]:
# To read few rows of csv file
df= pd.read_csv("stock_data.csv" ,nrows=2)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.


In [15]:
df= pd.read_csv("stock_data.csv", na_values=["not available","n.a.",-1])
df
# Some values are not available changing to NaN

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


In [18]:
# Revenue can't be -1 but eps can be
#So supply in dictionary not list
df= pd.read_csv("stock_data.csv", na_values={
    'eps' : ["not available", "n.a."],
    'revenue' :["not available", "n.a.", -1],
    'people' : ["not available", "n.a."]
})
df
# So eps retained -1 but revenue's -1 changed to NaN

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,
2,MSFT,-1.0,85.0,64,bill gates
3,RIL,,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,ratan tata


In [22]:
df.to_csv('new.csv' ,index=False)
# This will create a new csv file in same dir havind df data
# index= False to remove the 0 based index

In [23]:
# To skip few col in csv files
df.columns

Index(['tickers', 'eps', 'revenue', 'price', 'people'], dtype='object')

In [27]:
df.to_csv("new.csv",columns=['tickers','people'])
# header=False for no colHeader