# Importing and Exporting Data

## Importing Datasets from CSV files

In [1]:
import pandas as pd
df = pd.read_csv('https://assets.datacamp.com/production/repositories/497/datasets/3ed265bae13db503890f98663c33ac16a041e7a3/auto-mpg.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null int64
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null object
name      392 non-null object
dtypes: float64(3), int64(4), object(2)
memory usage: 27.6+ KB


In [2]:
# Using iloc to view slice of the data
df.iloc[150:160, :]

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
150,19.0,6,225.0,95,3264,16.0,75,US,plymouth valiant custom
151,18.0,6,250.0,105,3459,16.0,75,US,chevrolet nova
152,15.0,6,250.0,72,3432,21.0,75,US,mercury monarch
153,15.0,6,250.0,72,3158,19.5,75,US,ford maverick
154,16.0,8,400.0,170,4668,11.5,75,US,pontiac catalina
155,15.0,8,350.0,145,4440,14.0,75,US,chevrolet bel air
156,16.0,8,318.0,150,4498,14.5,75,US,plymouth grand fury
157,14.0,8,351.0,148,4657,13.5,75,US,ford ltd
158,17.0,6,231.0,110,3907,21.0,75,US,buick century
159,16.0,6,250.0,105,3897,18.5,75,US,chevroelt chevelle malibu


```python
# Using header keyword
sunspots = pd.read_csv(filepath, header=None)

# Using names keyword
col_names = ['year', 'month', 'day']
sunspots = pd.read_csv(filepath, header=None, names=col_names)

# Using na_values keyword (1)
sunspots = pd.read_csv(filepath, header=None, names=col_names, na_values='-1')

# Using na_values keyword (2) - Includes a blank space before -1
# na values == NaN
sunspots = pd.read_csv(filepath, header=None, names=col_names, na_values=' -1')

# Using na_values keyword (3)
sunspots = pd.read_csv(filepath, header=None, names=col_names, na_values={'sunspots':[' -1']})

# Using parse_dates keyword
sunspots = pd.read_csv(filepath, header=None, names=col_names, na_values={'sunspots':[' -1']}, parse_dates=[[0,1,2]])

# Using dates as index
sunspots.index = sunspots['year_month_day']
sunspots.index.name = 'date'
# Trimming redundant columns - Place interested columns into a list
cols = ['sunspots', 'definite']
sunpots = sunspots[cols]
sunspots.iloc[10:20, :]
```

## Exporting / Writing files

```python
# CSV
out_csv = 'sunspots.csv'
sunspots.to_csv(out_csv)

#TSV
out_tsv = 'sunspots.tsv'
sunpots.to_csv(out_tsv, sep='\t')

#Excel
out_xlsx = 'sunspots.xlsx'
sunspots.to_excel(out_xlsx)
```

## Delimiters, Headers, and Extensions

In [3]:
filename = 'https://assets.datacamp.com/production/repositories/497/datasets/4e8cdfbf9e125bb723981f9218bee16194c7d869/messy_stock_data.tsv'
df = pd.read_csv(filename)
df.head()

Unnamed: 0,The following stock data was collect on 2016-AUG-25 from an unknown source
These kind of ocmments are not very useful,are they?
probably should just throw this line away too,but not the next since those are column labels
name Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec,
# So that line you just read has all the column headers labels,
IBM 156.08 160.01 159.81 165.22 172.25 167.15 164.75 152.77 145.36 146.11 137.21 137.96,


In [4]:
# Read in the file with correct parameters
df = pd.read_csv(filename, delimiter=' ', header=3, comment='#')
df.head()

Unnamed: 0,name,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,IBM,156.08,160.01,159.81,165.22,172.25,167.15,164.75,152.77,145.36,146.11,137.21,137.96
1,MSFT,45.51,43.08,42.13,43.47,47.53,45.96,45.61,45.51,43.56,48.7,53.88,55.4
2,GOOGLE,512.42,537.99,559.72,540.5,535.24,532.92,590.09,636.84,617.93,663.59,735.39,755.35
3,APPLE,110.64,125.43,125.97,127.29,128.76,127.81,125.34,113.39,112.8,113.36,118.16,111.73
