In [1]:
# Import dependencies
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt

In [2]:
# Name different csv files 
ford_csv = "ford_revenue.csv"
gm_csv = "gm_revenue.csv"
honda_csv = "honda_revenue.csv"
toyota_csv = "toyota_revenue.csv"

In [3]:
# Read CSV files
ford_df = pd.read_csv(ford_csv)
gm_df = pd.read_csv(gm_csv)
honda_df = pd.read_csv(honda_csv)
toyota_df = pd.read_csv(toyota_csv)

In [4]:
# Output Ford Data
ford_df.head()

Unnamed: 0,Ford Total Revenue,Dates,Last Price
0,0,2018-09-30,37666000000
1,1,2018-06-30,38920000000
2,2,2018-03-31,41959000000
3,3,2017-12-31,41326000000


In [5]:
# Merge two DataFrames together based on the Dates they share
revenue_df1 = pd.merge(ford_df, gm_df, on="Dates")
revenue_df1.head()

Unnamed: 0,Ford Total Revenue,Dates,Last Price_x,GM Total Revenue,Last Price_y
0,1,2018-06-30,38920000000,0,36760000000
1,2,2018-03-31,41959000000,1,36099000000
2,3,2017-12-31,41326000000,2,37715000000


In [6]:
# Look at the data type 
revenue_df1.columns
revenue_df1.dtypes

Ford Total Revenue     int64
Dates                 object
Last Price_x           int64
GM Total Revenue       int64
Last Price_y           int64
dtype: object

In [7]:
# Create a DataFrame
items_pd = pd.DataFrame(revenue_df1, columns=[
                        "Dates", "Last Price_x", "Last Price_y"])
items_pd

Unnamed: 0,Dates,Last Price_x,Last Price_y
0,2018-06-30,38920000000,36760000000
1,2018-03-31,41959000000,36099000000
2,2017-12-31,41326000000,37715000000


In [8]:
# Rename columns so that they are differentiated
revenue_df1_temp = items_pd.rename(columns={"Dates": "Dates", "Last Price_x": "Ford total revenue", "Last Price_y": "GM total revenue"})
revenue_df1_temp.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue
0,2018-06-30,38920000000,36760000000
1,2018-03-31,41959000000,36099000000
2,2017-12-31,41326000000,37715000000


In [9]:
# Merge two DataFrames together based on the Dates they share
revenue_df2 = pd.merge(revenue_df1_temp, honda_df, on="Dates")
revenue_df2.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Honda Total Revenue,Last Price
0,2018-06-30,38920000000,36760000000,0,36337920990
1,2018-03-31,41959000000,36099000000,1,36833675752
2,2017-12-31,41326000000,37715000000,2,35119466625


In [10]:
items_pd_2 = pd.DataFrame(revenue_df2, columns=[
                        "Dates", "Ford total revenue", "GM total revenue", "Last Price"])
items_pd_2.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Last Price
0,2018-06-30,38920000000,36760000000,36337920990
1,2018-03-31,41959000000,36099000000,36833675752
2,2017-12-31,41326000000,37715000000,35119466625


In [11]:
# Rename columns so that they are differentiated
revenue_df2_temp = items_pd_2.rename(columns={"Last Price": "Honda total revenue"})
revenue_df2_temp.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Honda total revenue
0,2018-06-30,38920000000,36760000000,36337920990
1,2018-03-31,41959000000,36099000000,36833675752
2,2017-12-31,41326000000,37715000000,35119466625


In [12]:
# Rename columns so that they are differentiated
revenue_df3 = pd.merge(revenue_df2_temp, toyota_df, on="Dates")
revenue_df3.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Honda total revenue,Toyota Total Revenue,Last Price
0,2018-06-30,38920000000,36760000000,36337920990,0,66485478990
1,2018-03-31,41959000000,36099000000,36833675752,1,71344081224
2,2017-12-31,41326000000,37715000000,35119466625,2,67501182125


In [13]:
# Rename columns so that they are differentiated
items_pd_3 = pd.DataFrame(revenue_df3, columns=[
                        "Dates", "Ford total revenue", "GM total revenue", "Honda total revenue", "Last Price"])
items_pd_3.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Honda total revenue,Last Price
0,2018-06-30,38920000000,36760000000,36337920990,66485478990
1,2018-03-31,41959000000,36099000000,36833675752,71344081224
2,2017-12-31,41326000000,37715000000,35119466625,67501182125


In [14]:
# Rename columns so that they are differentiated
revenue_df3_temp = items_pd_3.rename(columns={"Last Price": "Toyota total revenue"})
revenue_df3_temp.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Honda total revenue,Toyota total revenue
0,2018-06-30,38920000000,36760000000,36337920990,66485478990
1,2018-03-31,41959000000,36099000000,36833675752,71344081224
2,2017-12-31,41326000000,37715000000,35119466625,67501182125


In [15]:
# Append dates into a new date 
new_date = []
for dates in revenue_df3_temp['Dates']:
    s = dt.datetime.strptime(dates, '%Y-%m-%d')
    new_date.append(s)
print(new_date)

[datetime.datetime(2018, 6, 30, 0, 0), datetime.datetime(2018, 3, 31, 0, 0), datetime.datetime(2017, 12, 31, 0, 0)]


In [16]:
# Add new serie into DataFrame
se = pd.Series(new_date)

revenue_df3_temp['New Date'] = se.values

revenue_df3_temp.head()

Unnamed: 0,Dates,Ford total revenue,GM total revenue,Honda total revenue,Toyota total revenue,New Date
0,2018-06-30,38920000000,36760000000,36337920990,66485478990,2018-06-30
1,2018-03-31,41959000000,36099000000,36833675752,71344081224,2018-03-31
2,2017-12-31,41326000000,37715000000,35119466625,67501182125,2017-12-31


In [18]:
# Quick plot overview 
fig, ax1 = plt.subplots()
ax1.plot(revenue_df3_temp['Dates'], revenue_df3_temp['Ford total revenue'], label="Ford Total Revenue")
ax1.legend(loc="upper right")
ax1.plot(revenue_df3_temp['Dates'], revenue_df3_temp['GM total revenue'],label="GM Total Revenue")
ax1.legend(loc="upper right")
ax1.plot(revenue_df3_temp['Dates'], revenue_df3_temp['Toyota total revenue'],label="Toyota Total Revenue")
ax1.legend(loc="upper right")
ax1.plot(revenue_df3_temp['Dates'], revenue_df3_temp['Honda total revenue'],label="Honda Total Revenue")
ax1.legend(loc="upper right")

fig.autofmt_xdate()

<IPython.core.display.Javascript object>