# CSV

### Imports

In [287]:
import csv
import pandas

### Read a csv file

In [288]:
with open('csv_files/lorem_ipsum.txt') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            print(f'\t{row[0]} is a Latin word {row[1]} is really important, '
                  f'and means a lot {row[2]}.{row[3]} was generated. '
                  f'{row[4]}, I really have no idea what it means')
            line_count += 1
    print(f'Processed {line_count} lines.')


Column names are First, Second, Third, Fourth, Fifth
	Lorem is a Latin word ipsum is really important, and means a lot dolorsit amet.consectetur adipiscing was generated. elit, I really have no idea what it means
	Phasellus is a Latin word vitae is really important, and means a lot tempor.lectus was generated.  sdawcy, I really have no idea what it means
	Nunc eleifend is a Latin word consectetur is really important, and means a lot erat a.tristique was generated.  tristique, I really have no idea what it means
	Etiam is a Latin word blandit venenatis is really important, and means a lot dolor.sed maximus was generated. purus egestas, I really have no idea what it means
	Vestibulum is a Latin word erat lectus is really important, and means a lot viverra nec.sodales eget was generated. pretium nec orci, I really have no idea what it means
	Ut maximus is a Latin word massa risus is really important, and means a lot non rhoncus.ipsum was generated. sollicitudin eget, I really have no idea

### Reading CSV files into a dictionary

In [289]:
with open('csv_files/lorem_ipsum.txt') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            print(f'\t{row["First"]} is a Latin word {row["Second"]} is really important, '
                  f'and means a lot {row["Third"]}.{row["Fourth"]} was generated. '
                  f'{row["Fifth"]}, I really have no idea what it means')
            line_count += 1
    print(f'Processed {line_count} lines.')


Column names are First, Second, Third, Fourth, Fifth
	Phasellus is a Latin word vitae is really important, and means a lot tempor.lectus was generated.  sdawcy, I really have no idea what it means
	Nunc eleifend is a Latin word consectetur is really important, and means a lot erat a.tristique was generated.  tristique, I really have no idea what it means
	Etiam is a Latin word blandit venenatis is really important, and means a lot dolor.sed maximus was generated. purus egestas, I really have no idea what it means
	Vestibulum is a Latin word erat lectus is really important, and means a lot viverra nec.sodales eget was generated. pretium nec orci, I really have no idea what it means
	Ut maximus is a Latin word massa risus is really important, and means a lot non rhoncus.ipsum was generated. sollicitudin eget, I really have no idea what it means
	Aenean is a Latin word et is really important, and means a lot dictum.est was generated. massa, I really have no idea what it means
	Ut is a Lat

### Writing a CSV file

In [290]:
with open('csv_files/public_opinion.csv', mode='w') as public_opinion_file:
    public_opinion_writer = csv.writer(public_opinion_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    public_opinion_writer.writerow(['Person', 'Job', 'Month'])
    public_opinion_writer.writerow(['Max Musterman', 'Consultant', 'January'])

### Writing a CSV file from a Dictionary

In [291]:
with open('csv_files/public_opinion2.csv', mode='w') as csv_file:
    fieldnames = ['person_name', 'job', 'employment_month']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()
    writer.writerow({'person_name': 'Max Musterman', 'job': 'Consultant', 'employment_month': 'January'})
    writer.writerow({'person_name': 'Pawel Kowalski', 'job': 'Nurse', 'employment_month': 'February'})


### Parsing complicated CSV with pandas

#### Print only first 10 rows

In [292]:
df = pandas.read_csv("csv_files/random_csv_data.csv")
df.head(10)

Unnamed: 0,Full name,Age,Birthday,Email,City,Salary
0,Violet Morris,50,8/13/1996,lit@jelavef.ad,Ojotasjo,$4196.55
1,Jeffery Horton,44,6/22/1982,didehidog@wiep.gm,Builaoki,$8401.49
2,Adrian Erickson,41,1/31/1962,malmob@jam.fm,Danada,$9804.96
3,Sarah Hubbard,50,3/22/1998,arega@ma.uy,Kiwraciw,$7.35
4,Lida Moran,27,2/7/1965,nafriwnah@dacaz.lc,Nuosijo,$3736.35
5,Gregory Ellis,28,1/18/1969,ujkeduku@gaf.do,Niljujhet,$8391.73
6,Jonathan Hopkins,65,12/21/1989,gafhab@pinofsis.ph,Sahtufat,$959.05
7,Leroy Weber,30,3/14/1990,vevuj@tej.be,Jebmudum,$2411.78
8,Carl Clarke,57,4/22/1967,wopevbic@dul.gn,Bigovejo,$8802.84
9,Nannie Simmons,40,1/15/1982,ebamernow@ewo.bn,Nukiof,$3121.11


### Data types

Birthday and Salary should not be of type str!
"Object" type is a DataFrame str format.

In [293]:
print(df.dtypes)

Full name    object
Age           int64
Birthday     object
Email        object
City         object
Salary       object
dtype: object


### Changing the dataframe index-column

In [294]:
df = pandas.read_csv("csv_files/random_csv_data.csv",index_col="Full name")
df.head(10)

Unnamed: 0_level_0,Age,Birthday,Email,City,Salary
Full name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Violet Morris,50,8/13/1996,lit@jelavef.ad,Ojotasjo,$4196.55
Jeffery Horton,44,6/22/1982,didehidog@wiep.gm,Builaoki,$8401.49
Adrian Erickson,41,1/31/1962,malmob@jam.fm,Danada,$9804.96
Sarah Hubbard,50,3/22/1998,arega@ma.uy,Kiwraciw,$7.35
Lida Moran,27,2/7/1965,nafriwnah@dacaz.lc,Nuosijo,$3736.35
Gregory Ellis,28,1/18/1969,ujkeduku@gaf.do,Niljujhet,$8391.73
Jonathan Hopkins,65,12/21/1989,gafhab@pinofsis.ph,Sahtufat,$959.05
Leroy Weber,30,3/14/1990,vevuj@tej.be,Jebmudum,$2411.78
Carl Clarke,57,4/22/1967,wopevbic@dul.gn,Bigovejo,$8802.84
Nannie Simmons,40,1/15/1982,ebamernow@ewo.bn,Nukiof,$3121.11


### Fixing the data types
#### We force pandas to parse date with parse_date

In [295]:
df = pandas.read_csv("csv_files/random_csv_data.csv",index_col="Full name", parse_dates=['Birthday'])
df.head(10)
print(type(df['Birthday'][0]))

<class 'pandas._libs.tslibs.timestamps.Timestamp'>


### We can set column names as well!

In [296]:
df = pandas.read_csv('csv_files/random_csv_data.csv',
            index_col='Full name',
            parse_dates=['Birthday_changes'],
            header=0,
            names=['Full name', 'Age_changed','Birthday_changes', 'Email_changed','City_changed','Salary_changed'])
df.head(10)
print(type(df['Birthday_changes'][0]))

<class 'pandas._libs.tslibs.timestamps.Timestamp'>


### Writing CSV with pandas
#### We save the previously modified df

In [297]:
df.to_csv('csv_files/random_csv_data_modified.csv')