## <font color="purple"><h4 align="center">Read/Write CSV and Excel Files in Pandas</font>

### <font color="blue">Read CSV</color>

In [1110]:
import pandas as pd

df = pd.read_csv("stock_data.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [1111]:
df = pd.read_csv("stock_data.csv", skiprows=1)
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [1112]:
df = pd.read_csv("stock_data.csv", header=1) # skiprows and header are kind of same
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [1113]:
df = pd.read_csv("stock_data.csv", header=None, names = ["ticker","eps","revenue","people"])
df

Unnamed: 0,ticker,eps,revenue,people
tickers,eps,revenue,price,people
GOOGL,27.82,87,845,larry page
WMT,4.61,484,65,
MSFT,-1,85,64,bill gates
RIL,not available,50,1023,mukesh ambani
TATA,5.6,-1,n.a.,ratan tata


In [1114]:
df = pd.read_csv("stock_data.csv",  nrows=2)
df

# nrows=2: This parameter specifies that only the first 2 rows of the CSV file should be read. 
# This is useful for quickly inspecting the data without loading the entire file, especially if it's large.

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,


In [1115]:
df = pd.read_csv("stock_data.csv", na_values=["n.a.", "not available"])
df
# na_values=["n.a.", "not available"]: This parameter allows you to specify custom strings 
# that should be treated as missing values (NaN) when reading the file.
# In this case, any occurrences of "n.a." or "not available" in 
# the CSV will be converted to NaN (Not a Number) in the DataFrame.

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


In [1116]:
df = pd.read_csv("stock_data.csv",  na_values={
        'eps': ['not available'],
        'revenue': [-1],
        'people': ['not available','n.a.']
    })
df
# 'eps': ['not available']: In the eps column, any occurrence of the string "not available" will be treated as a NaN.
# 'revenue': [-1]: In the revenue column, any occurrence of -1 will be treated as a NaN. 
# This is often used to indicate missing or invalid data.
# 'people': ['not available', 'n.a.']: In the people column, both "not available" and "n.a." will be treated as NaN.

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,
2,MSFT,-1.0,85.0,64,bill gates
3,RIL,,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,ratan tata


### <font color="blue">Write to CSV From Another CSV</color>

In [1117]:
# df.to_csv("new.csv", index=False)#rows=False

# stock_data.csv is stored in df

# index=False: This parameter specifies that the row names (index) 
# should not be written to the CSV file. 
# Removing Index (index=False): Default behavior keeps headers; only the data is saved without the index.
# Removing Header (header=False): Default behavior keeps the index; only the data rows are saved without the column names.

In [1118]:
# df.columns
# Typed to find out name of each column

In [1119]:
df.to_csv("new.csv",header=False)#ColumnHeading=False

# Comment  df.to_csv("new.csv", columns=["eps","price"], index=False) and see
# Headers refer to the names of the columns in a DataFrame. They are usually located at the top of the DataFrame.

# df.to_csv: This method is used to write the DataFrame df to a CSV file.
# "new.csv": This is the name of the output CSV file. 
# If a file with this name already exists, it will be overwritten.
# header=False: This parameter specifies that the column names (header) 
# should not be written to the CSV file. 
# As a result, only the data rows will be saved without the header row

In [1120]:
df.to_csv("new.csv", columns=["tickers","price"], index=True,  header=True)

# Again, this method writes the DataFrame df to a CSV file.
# "new.csv": The name of the output CSV file, which will be overwritten if it exists.
# columns=["tickers", "price"]: This parameter specifies that only the columns
# named "tickers" and "price" should be included in the output CSV.
# If these columns exist in the DataFrame, only their data will be saved.
# index=False: This parameter indicates that the index of the row labels should not be written to the CSV file.
# Only the data in the specified columns will be saved.

### <font color="blue">Read Excel</color>

In [1121]:
df = pd.read_excel("stock_dataCompany.xlsx")
df

Unnamed: 0.1,Unnamed: 0,tickers,price,pe,eps
0,0,GOOGL,845,30.37,27.82
1,1,WMT,65,14.26,4.61
2,2,MSFT,64,30.97,2.12


In [1122]:
def convert_people_cell(cell):
    if cell=="n.a.":
        return 'Sam Walton'
    return cell

# convert_people_cell(cell):

# This function takes a single argument, cell.
# If the value of cell is "n.a.", it returns the string 'Sam Walton'.
# Otherwise, it simply returns the original cell value.

def convert_price_cell(cell):
    if cell=="n.a.":
        return 50
    return cell

# convert_price_cell(cell):

# Similar to the first function, this one also takes a cell as an argument.
# If cell equals "n.a.", it returns the integer 50.
# For any other value, it returns the original cell value.
    
df = pd.read_excel("ConvertExcelData.xlsx", converters= {
        'people': convert_people_cell,
        'price': convert_price_cell
    })
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,Sam Walton
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,50,ratan tata


### <font color="blue">Write to Excel</color>

In [1123]:
df.to_excel("new.xlsx", sheet_name="stocks", index=False, startrow=2, startcol=1)   

**Write two dataframes to two separate sheets in excel**

In [1124]:
df_stocks = pd.DataFrame({
    'tickers': ['GOOGL', 'WMT', 'MSFT'],
    'price': [845, 65, 64 ],
    'pe': [30.37, 14.26, 30.97],
    'eps': [27.82, 4.61, 2.12]
})

df_weather =  pd.DataFrame({
    'day': ['1/1/2017','1/2/2017','1/3/2017'],
    'temperature': [32,35,28],
    'event': ['Rain', 'Sunny', 'Snow']
})

In [1125]:
with pd.ExcelWriter('stocks_weather.xlsx') as writer:
    df_weather.to_excel(writer, sheet_name="weather")
    
# When we  run this code, it will create an Excel file named 'stocks_weather.xlsx' 
# we can only write one df_stocks or df_weather at one time in one excel file

In [1126]:
with pd.ExcelWriter('stock_dataCompany.xlsx') as writer:
    df_stocks.to_excel(writer, sheet_name="stocks")