## Differences between CSV and XLS file formats:
- CSV format is a plain text format in which values are seperated by commas (Comma Seperated Values).
- XLS file format is an Excel Sheets binary file format which holds information about all the worksheets in a file, including both content and formatting.

In [1]:
import pandas as pd

## Writing a CSV File

In [2]:
d1 = {'a':[1,2,3,4,5], 'b':[6,7,8,9,10], 'c':[11,12,13,14,15]}
var1 = pd.DataFrame(d1,index=['i','ii','iii','iv','v'])
print(var1)

     a   b   c
i    1   6  11
ii   2   7  12
iii  3   8  13
iv   4   9  14
v    5  10  15


In [3]:
# converting to CSV File
var1.to_csv("text1.csv",index=False)

In [4]:
# changing header of file
var1.to_csv('text2.csv',index=False, header=['x','y','z'])

## Reading a CSV File

In [5]:
csv1 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv')
csv1

Unnamed: 0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
0,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
1,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H08,Total expenditure,Financial performance,654404,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...,...
41710,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41711,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41712,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41713,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [6]:
# for printing only 1 row
csv2 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv', nrows=1)
csv2

Unnamed: 0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
0,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...


In [7]:
# printing type of csv1 and csv2
print(type(csv1))
print(type(csv2))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [8]:
# for printing more than 5 rows
csv3 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',nrows=5)
csv3

Unnamed: 0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
0,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
1,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H08,Total expenditure,Financial performance,654404,ANZSIC06 divisions A-S (excluding classes K633...


In [9]:
# choosing specific columns
csv4 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',nrows=5, usecols=['Variable_code','Value',])
csv4

Unnamed: 0,Variable_code,Value
0,H01,757504
1,H04,674890
2,H05,49593
3,H07,33020
4,H08,654404


In [10]:
# choosing specific columns by its index numbers
csv5 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',nrows=5, usecols=[0,2,4])
csv5

Unnamed: 0,Year,Industry_code_NZSIOC,Units
0,2021,99999,Dollars (millions)
1,2021,99999,Dollars (millions)
2,2021,99999,Dollars (millions)
3,2021,99999,Dollars (millions)
4,2021,99999,Dollars (millions)


In [16]:
# skipping a specific row 

print('Original Data')
csv1 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv')
csv1

Original Data


Unnamed: 0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
0,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
1,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H08,Total expenditure,Financial performance,654404,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...,...
41710,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41711,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41712,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41713,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [17]:
# skipping any row
csv5 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',skiprows=[0])
csv5

Unnamed: 0,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,"757,504","ANZSIC06 divisions A-S (excluding classes K6330, L6711, O7552, O760, O771, O772, S9540, S9601, S9602, and S9603)"
0,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
1,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H08,Total expenditure,Financial performance,654404,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H09,Interest and donations,Financial performance,26138,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...,...
41709,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41710,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41711,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41712,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [18]:
# using any column as index row
csv5 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',index_col='Variable_code')
csv5

Unnamed: 0_level_0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
Variable_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
H01,2021,Level 1,99999,All industries,Dollars (millions),Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
H04,2021,Level 1,99999,All industries,Dollars (millions),"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
H05,2021,Level 1,99999,All industries,Dollars (millions),"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
H07,2021,Level 1,99999,All industries,Dollars (millions),Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
H08,2021,Level 1,99999,All industries,Dollars (millions),Total expenditure,Financial performance,654404,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...
H37,2013,Level 3,ZZ11,Food product manufacturing,Percentage,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
H38,2013,Level 3,ZZ11,Food product manufacturing,Percentage,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
H39,2013,Level 3,ZZ11,Food product manufacturing,Percentage,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
H40,2013,Level 3,ZZ11,Food product manufacturing,Percentage,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [19]:
# changing any row to heading
csv8 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',header=2)
csv8


Unnamed: 0,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,"674,890","ANZSIC06 divisions A-S (excluding classes K6330, L6711, O7552, O760, O771, O772, S9540, S9601, S9602, and S9603)"
0,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
1,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H08,Total expenditure,Financial performance,654404,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H09,Interest and donations,Financial performance,26138,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H10,Indirect taxes,Financial performance,6991,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...,...
41708,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41709,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41710,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41711,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [20]:
# changing heading row to any other names
csv9 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',names=['col1','col2','col3','col4','col5','col6','col7','col8','col9','col10'])
csv9

Unnamed: 0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10
0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
1,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...,...
41711,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41712,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41713,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41714,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [21]:
csv10 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',names=['col1','col2','col3','col4','col5','col6','col7','col8',])
csv10

Unnamed: 0,Unnamed: 1,col1,col2,col3,col4,col5,col6,col7,col8
Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...
2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [23]:
# when heading is deleted then giving a new row of columns
csv11 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',header=None, prefix='col')
csv11

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9
0,Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
1,2021,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,757504,ANZSIC06 divisions A-S (excluding classes K633...
2,2021,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,674890,ANZSIC06 divisions A-S (excluding classes K633...
3,2021,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,49593,ANZSIC06 divisions A-S (excluding classes K633...
4,2021,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,33020,ANZSIC06 divisions A-S (excluding classes K633...
...,...,...,...,...,...,...,...,...,...,...
41711,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H37,Quick ratio,Financial ratios,52,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41712,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H38,Margin on sales of goods for resale,Financial ratios,40,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41713,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H39,Return on equity,Financial ratios,12,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."
41714,2013,Level 3,ZZ11,Food product manufacturing,Percentage,H40,Return on total assets,Financial ratios,5,"ANZSIC06 groups C111, C112, C113, C114, C115, ..."


In [26]:
# converting any column data from integer to float
csv12 = pd.read_csv('annual-enterprise-survey-2021-financial-year-provisional-csv.csv',dtype={'Value':'float'})
csv12

ValueError: could not convert string to float: '757,504'