In [1]:
import pandas as pd

We saw in our last chapter that we have and army of methods to load data into a Pandas DataFrame. 

In this chapter we are going to talk about methods provided by Pandas to write data from a DataFrame onto a different Data Source/ Data Target.

## Let`s beggin

In [2]:
## we imported Pandas 
## lets load one of the file we previously used to Load Data into a DataFrame 

my_df = pd.read_csv('DataSources\my_csv.csv')

my_df

## Ok - all is good now, we have a DataFrame and let`s go thru some examples on how we can write data out of a DataFrame


Unnamed: 0,Name,Age
0,Adrian,33
1,Julia,21
2,Julia,22


## Write Data in CSV format

In [3]:
my_df.to_csv('DataOut\my_exported_csv.csv')

## Ok - we have the DataFrame Exported into a csv format file 

## Lets look at it ! 

print(open('DataOut\csv\my_exported_csv.csv').read())

## You can see we the index added as well to the file
## let`s get rid of it 

my_df.to_csv('DataOut\csv\my_exported_csv.csv',index=False)

## Lets look at it again 

print(open('DataOut\csv\my_exported_csv.csv').read())

## Looks great - its clean

,Name,Age
0,Adrian,33
1,Julia,21

Name,Age
Adrian,33
Julia,21
Julia,22



## Write Data in CSV format and Zipped

In [4]:
## Ok this is a nice feature 
## for this the parameter compression that will hold a dict object
## in this case we will call it compression_option and give it the method & archive_name

compression_option = dict(method='zip', archive_name='my_exported_csv.csv')

my_df.to_csv('DataOut\csv\my_exported_csv.zip',index=False, compression=compression_option)

# Great ! 

## Write Data in CSV format and with custom field delimiter

In [5]:
my_df.to_csv('DataOut\csv\my_exported_csv.csv')

my_df.to_csv('DataOut\csv\my_exported_csv_custom.csv',index=False,sep='@')

print(open('DataOut\csv\my_exported_csv_custom.csv').read())

Name@Age
Adrian@33
Julia@21
Julia@22



## Write Data in JSON format

In [6]:
## we have our DF loaded 
## writing data into json format is as easy as it gets in pandas and take a guess whats the name ! Correct to_json()
my_df

my_df.to_json('DataOut\my_exported_json.json')

## read the file 
print(open('DataOut\my_exported_json.json').read())

## Again this index !! 
## Now in to_json() we have parameter called orient and this is a very powerfull parameter, he dictates the way the json 
## output will be constructed
## Lets see it in action ! 



{"Name":{"0":"Adrian","1":"Julia","2":"Julia"},"Age":{"0":33,"1":21,"2":22}}



#### Available orinet options are:
- split
- records
- index
- columns
- values 
- table

Depending on you use case you will choose the right one for you! 



In [7]:
## We will start with the default orient, which is 'columns', when we export to_json() from a dataframe

my_df.to_json('DataOut\json\my_exported_json_columns.json')

## read the file 
print(open('DataOut\json\my_exported_json_columns.json').read())

# {
#     "Name": {
#         "0": "Adrian",
#         "1": "Julia"
#     },
#     "Age": {
#         "0": 33,
#         "1": 21
#     }
# }

{"Name":{"0":"Adrian","1":"Julia","2":"Julia"},"Age":{"0":33,"1":21,"2":22}}


In [8]:
## split

my_df.to_json('DataOut\json\my_exported_json_split.json', orient='split')

## read the file 
print(open('DataOut\json\my_exported_json_split.json').read())

# {
#     "columns": [
#         "Name",
#         "Age"
#     ],
#     "index": [
#         0,
#         1
#     ],
#     "data": [
#         [
#             "Adrian",
#             33
#         ],
#         [
#             "Julia",
#             21
#         ]
#     ]
# }

{"columns":["Name","Age"],"index":[0,1,2],"data":[["Adrian",33],["Julia",21],["Julia",22]]}


In [9]:
## records - this might be the most used one

my_df.to_json('DataOut\json\my_exported_json_records.json', orient='records')

## read the file 
print(open('DataOut\json\my_exported_json_records.json').read())

# [
#     {
#         "Name": "Adrian",
#         "Age": 33
#     },
#     {
#         "Name": "Julia",
#         "Age": 21
#     }
# ]

[{"Name":"Adrian","Age":33},{"Name":"Julia","Age":21},{"Name":"Julia","Age":22}]


In [10]:
## index - the index value will be the root of the element

my_df.to_json('DataOut\json\my_exported_json_index.json', orient='index')

## read the file 
print(open('DataOut\json\my_exported_json_index.json').read())

# {
#     "0": {
#         "Name": "Adrian",
#         "Age": 33
#     },
#     "1": {
#         "Name": "Julia",
#         "Age": 21
#     }
# }

{"0":{"Name":"Adrian","Age":33},"1":{"Name":"Julia","Age":21},"2":{"Name":"Julia","Age":22}}


In [11]:
## values - very compact, if you don`t need the column - this is the one to go for 
my_df.to_json('DataOut\json\my_exported_json_values.json', orient='values')

## read the file 
print(open('DataOut\json\my_exported_json_values.json').read())

# [
#     [
#         "Adrian",
#         33
#     ],
#     [
#         "Julia",
#         21
#     ]
# ]


[["Adrian",33],["Julia",21],["Julia",22]]


In [12]:
## table - maybe the most verbose of all, but very handy in some cases.

my_df.to_json('DataOut\json\my_exported_json_table.json', orient='table')

## read the file 
print(open('DataOut\json\my_exported_json_table.json').read())


# {
#     "schema": {
#         "fields": [
#             {
#                 "name": "index",
#                 "type": "integer"
#             },
#             {
#                 "name": "Name",
#                 "type": "string"
#             },
#             {
#                 "name": "Age",
#                 "type": "integer"
#             }
#         ],
#         "primaryKey": [
#             "index"
#         ],
#         "pandas_version": "0.20.0"
#     },
#     "data": [
#         {
#             "index": 0,
#             "Name": "Adrian",
#             "Age": 33
#         },
#         {
#             "index": 1,
#             "Name": "Julia",
#             "Age": 21
#         }
#     ]
# }

{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"Name","type":"string"},{"name":"Age","type":"integer"}],"primaryKey":["index"],"pandas_version":"0.20.0"},"data":[{"index":0,"Name":"Adrian","Age":33},{"index":1,"Name":"Julia","Age":21},{"index":2,"Name":"Julia","Age":22}]}



#### Ok, we saw all the available orient optins and how the look, what about the other parameters ? 



In [13]:
## We can also compress a json file using to_json() method

compression_option = dict(method='zip', archive_name='my_exported_compressed.json')

my_df.to_json('DataOut\json\my_exported_compressed.zip', orient='table', compression=compression_option)

In [14]:

## we can also play with the indentation defintion 
## we will the default orientation and we will give it a ridiculos value od 33

my_df.to_json('DataOut\json\my_exported_json_indent.json', indent=33)

## yeah it looks not to good , but this just shows that we can control this aspect as well ! 

# {
#                                  "Name":{
#                                                                   "0":"Adrian",
#                                                                   "1":"Julia"
#                                  },
#                                  "Age":{
#                                                                   "0":33,
#                                                                   "1":21
#                                  }
# }

#### What next ? 


**to_sql()** - this method is very powerfull and allows us to interact directly with a Database.

In [16]:
## For this example we will use sqlalchemy module and create connection to a MariaDB on my localhost

import sqlalchemy
SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://root:root@localhost/pandas'
## Dont mind the very dificult password ! :)

## Create the engine
engine = sqlalchemy.create_engine(SQLALCHEMY_DATABASE_URI)

## create a connection object - you will need to close 
conn = engine.connect().connection

## the parameter name represents the table that we will use to write data to, if table does not exisit then
## the table will be created
my_df.to_sql(name='my_users', con=engine)

## commit the data
conn.commit()

## Close connection 
conn.close()
## Query the newly created Table 
engine.execute("select * from my_users").fetchall()





[(0, 'Adrian', 33), (1, 'Julia', 21), (2, 'Julia', 22)]

In [17]:


## What is the Table exists ? 
## well fo this we have the parameter if_exists that has the following options
## {'fail', 'replace', 'append'} and the default is 'fail'

## 'append' - will add new roes to the table
conn = engine.connect().connection

my_df.to_sql(name='my_users', con=engine, if_exists='append')

## commit the data
conn.commit()

## Close connection 
conn.close()
## Query the Table 
engine.execute("select * from my_users").fetchall()



[(0, 'Adrian', 33),
 (1, 'Julia', 21),
 (2, 'Julia', 22),
 (0, 'Adrian', 33),
 (1, 'Julia', 21),
 (2, 'Julia', 22)]

In [None]:
## We see that we also get the index create, what if we don`t want the index ? 

## Lets drop the table and create new one but this time without the index column 
# conn = engine.connect().connection
# engine.execute("drop table my_users")
# ## Close connection 
# conn.close()


conn = engine.connect().connection

my_df.to_sql(name='my_users', con=engine, index=False)

## commit the data
conn.commit()

## Close connection 
conn.close()
## Query the Table 
engine.execute("select * from my_users").fetchall()



In [None]:
## Great ! But we still want an index on our table, but not the default index from the DF



# # Lets drop the table and create new one but this time with an index column we choose 
# conn = engine.connect().connection
# engine.execute("drop table my_users")
# ## Close connection 
# conn.close()

## We need to recreate our DF and choose the index 

my_df = pd.read_csv('DataSources\my_csv.csv',index_col=['Name'])

conn = engine.connect().connection

my_df.to_sql(name='my_users', con=engine, index=True)

## commit the data
conn.commit()

## Close connection 
conn.close()
## Query the Table 
engine.execute("select * from my_users").fetchall()


In [None]:

## If we want to update  some data into the same table ? 
## All we have to do is use the parameter if_exists with the option 'replace' and also we need to spcecify the 
my_df = pd.read_csv('DataSources\my_csv.csv',index_col=['Name'])
conn = engine.connect().connection

my_df.to_sql(name='my_users', con=engine, index=True, if_exists='replace', index_label='Name' )

## commit the data
conn.commit()

## Close connection 
conn.close()

## Next is to_html()

In [40]:
my_df = pd.read_csv('DataSources\my_csv.csv')

# my_df.to_html(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html.html')

## open the file now ! 
## very cool 

## now lets show what else we can do with this methods

## Generate html for an ipython notebook
# my_df.to_html(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html_notebook.html', notebook=True)

## Remove the bold type char from the column 
# my_df.to_html(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html_bold.html',bold_rows=False)

## Dont print the columns as header
# my_df.to_html(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html_noheader.html',header=False)

## Center the column Name
# my_df.to_html(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html_justify.html',justify='center')

## Options for justify are :
## - left,right,center,justify,justify-all,start,end,inherit,match-parent,initial,unset


## What about more complex formatting and bla bla bla ! 
## for this we have the classes parameter, this class definition is fetched from css file so... 
## we have a css file called my_css.css and we have class called mystyle 
# my_df.to_html(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html_css.html',classes='mystyle')

## Hmm strange nothing happened to the html design ?! 
## if we look in the definion of the html we can see that he is pointing to the class mystyle but there is not refference 
## to the my_css.css file, so he will do nothing

## Lets see what is techinique to add this my_css.css file 

## We will create an object called html_head, this object will wrap the output of the to_html() method
## and will have the reffernce to the my_css.css file
html_head = '''
<html>
  <head><title>HTML Pandas Dataframe with CSS</title></head>
  <link rel="stylesheet" type="text/css" href="my_css.css"/>
  <body>
    {to_html_output_table}
  </body>
</html>.
'''

# Next we write to a file the wrapped html and we replace the placeholder with the output of the to_html()
with open(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\html\to_html_css.html', 'w') as f:
    f.write(html_head.format(to_html_output_table=my_df.to_html(classes='mystyle')))
    
## View the html output! 
## Preatty cool


## Our Last one is to_excel()

This is used alot to send reports that can be open on most desktops and easy to share data. 

Lets see how it works! 

In [51]:
## we are going to use the same DF data 

my_df = pd.read_csv('DataSources\my_csv.csv')


## you probably know the name of the method already yeap is called to_excel()
## before we start we need the module openpyxl to be installed 


# my_df.to_excel(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\excel\my_excel.xlsx')

## Cool, lets go over some of the options

## Write to excell and set the name of the sheet
# my_df.to_excel(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\excel\my_excel.xlsx', sheet_name='cool_sheet')


## Append a new sheet to an existing excel
## for this task we will involve ExcelWriter() methos with mode='a' (append)
# with pd.ExcelWriter(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\excel\my_excel.xlsx', mode='a') as writer:
#     my_df.to_excel(writer, sheet_name='Sheet_name_2')
    
## We can also append multiple sheets 
# with pd.ExcelWriter(r'C:\Users\adriano\Desktop\Projects\Personal\Python Pandas\DataOut\excel\my_excel.xlsx', mode='a') as writer:
#     my_df.to_excel(writer, sheet_name='Sheet_name_3')
#     my_df.to_excel(writer, sheet_name='Sheet_name_4')
    
    
# That is iit !! 
    
