# Chapter 33: Exporting Data

In [1]:
import pandas as pd
url = 'https://github.com/mattharrison/datasets/raw/master'\
      '/data/dirtydevil.txt'
df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,
                 sep='\t')

  df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,


In [2]:
def to_denver_time(df_, time_col, tz_col):
    return (df_
       .assign(**{tz_col: df_[tz_col].replace('MDT', 'MST7MDT')})
       .groupby(tz_col)
       [time_col]
       .transform(lambda s: pd.to_datetime(s)
           .dt.tz_localize(s.name, ambiguous=True)
           .dt.tz_convert('America/Denver'))
    )

In [3]:
def tweak_river(df_):
    return (df_
     .assign(datetime=to_denver_time(df_, 'datetime', 'tz_cd'))
     .rename(columns={'144166_00060': 'cfs',
                      '144167_00065': 'gage_height'})
     .set_index('datetime')
    )

In [4]:
dd = tweak_river(df)
dd

Unnamed: 0_level_0,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.00,A:[91],,
2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.00,A:[91],,
...,...,...,...,...,...,...,...
2020-09-28 08:30:00-06:00,USGS,9333500,MDT,9.53,P,6.16,P
2020-09-28 08:45:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:00:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:15:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P


## 33.3 Creating CSV Files

- To write our data to file, we can use the ``.to_csv`` method.
- Pandas will write the index values in a CSV but when reading a CSV, it will create a new index unless we specify a column for the index

In [7]:
dd.to_csv('tmp/dd.csv')

In [8]:
print(dd.head(5).to_csv())

datetime,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd
2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.0,A:[91],,
2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.0,A:[91],,
2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.0,A:[91],,
2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.0,A:[91],,
2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.0,A:[91],,



In [9]:
# sitck datetime in the index
dd2 = pd.read_csv('tmp/dd.csv', index_col='datetime')

  dd2 = pd.read_csv('tmp/dd.csv', index_col='datetime')


## 33.4 Exporting to Excel

- We have to strip the timezone information before exporting to Excel

In [12]:
import openpyxl

In [15]:
(dd
 .reset_index()
 .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))
 .set_index('datetime')
 .to_excel('tmp/dd.xlsx')
)

- We can write a spreadsheet that has multiple sheets

In [16]:
writer = pd.ExcelWriter('tmp/dd2.xlsx')

In [17]:
dd2 = (dd
        .reset_index()
        .assign(datetime=lambda df_:df_.datetime.dt.tz_convert(tz=None))
        .set_index('datetime')
        )

In [18]:
(dd2
 .loc['2010':'2010-12-31']
 .to_excel(writer, sheet_name='2010'))

In [19]:
(dd2
 .loc['2011':'2011-12-31']
 .to_excel(writer, sheet_name='2011'))

In [20]:
writer.save()

  writer.save()


## 33.6 SQL

- We can stick a data frame into SQL table with the ``.to_sql`` method.

In [23]:
import sqlite3

con = sqlite3.connect('dd.db')
dd.to_sql('dd', con, if_exists='replace')

539305

- Read from database

In [24]:
import sqlalchemy as sa 

eng = sa.create_engine('sqlite:///dd.db')
sa_con = eng.connect()

dd2 = pd.read_sql('dd', sa_con, index_col='datetime')

- Our index with timezone aware dates was stored with timezone data but when the data came out from the database, this information was dropped

In [26]:
dd2.equals(dd)

False

In [28]:
# update the index with timezone information
(dd2  # doctest: +SKIP
 .reset_index()
 .assign(datetime=lambda df_: df_.datetime
       .dt.tz_localize('America/Denver', ambiguous=False))
 .set_index('datetime')
 .equals(dd)
)


False

In [29]:
obj = dd.to_dict()

In [30]:
dd2 = pd.DataFrame.from_dict(obj)
dd.equals(dd2)

True