In [33]:
import numpy as np 
import math
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error
from mpl_toolkits.mplot3d import axes3d
%matplotlib widget
plt.rcParams['figure.figsize'] = [8, 8]


fileName = 'cabinet_member_spending.csv'
df = pd.read_csv(fileName,header=0)
percentAsForeign = []
percentApproval = []
for index, row in df.iterrows():
    percentAsForeign.append(row[7]/(row[6] + row[7]))
    if  math.isnan(row[3]):
        percentApproval.append(.9)
    else :
        percentApproval.append(row[3]/100)
df['percentAsForeign'] = percentAsForeign
df['percentApproval'] = percentApproval

overallRegression,trainList,trainAnswers,testLabels,testAnswers = trainModel(df)
makePlot(overallRegression,trainList,trainAnswers,testLabels,testAnswers,"Regression based on days in office and approval rate")


demDF = df[df['President'] == 'Barack Obama']
repDF = df[df['President'] != 'Barack Obama']
demDF.reset_index(drop=True, inplace=True)
repDF.reset_index(drop=True, inplace=True)
demReg,trainList,trainAnswers,testLabels,testAnswers = trainModel(demDF)
makePlot(demReg,trainList,trainAnswers,testLabels,testAnswers,
         "Regression based on days in office and approval rate for democrats")
repReg,trainList,trainAnswers,testLabels,testAnswers = trainModel(repDF)
makePlot(repReg,trainList,trainAnswers,testLabels,testAnswers,
         "Regression based on days in office and approval rate for republicans")


# approvalRateRange = [x/100 for x in range(50,100)]
# dateRange = [3500*x/50 for x in range(0,50)]
# combinedLabelsToPredict = [[approval,date] for approval,date in zip(approvalRateRange,dateRange)]
# plotPrediction = reg.predict(combinedLabelsToPredict)
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')

# approvalRateRange = dateRange  = np.arange(-1,1,.05)
# approvalRateRangeMesh, dateRangeMesh = np.meshgrid(approvalRateRange, dateRange)
# dateRangeMesh = dateRangeMesh * 3500
# ax.plot_wireframe(approvalRateRangeMesh,dateRangeMesh,np.array([plotPrediction,plotPrediction]))
# ax.scatter([approvalRate[0] for approvalRate in trainList],[days[0] for days in trainList],[])
# plt.show

Mean squared error =  9.557146395470897e-05


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Mean squared error =  3.516729947489643e-05


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Mean squared error =  3.994305524719425e-05


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
#train list is [percent approval, days]
def dfToLabels(df):
    trainList = []
    answerLabelList = []
    for _,row in df.iterrows():
        paramsList = []
        paramsList.append(row.percentApproval)
        paramsList.append(row.Days)
        trainList.append(paramsList)
        answerLabelList.append(row.percentAsForeign)
    return trainList,answerLabelList

In [32]:
def makePlot(reg,trainList,trainAnswers,testLabels,testAnswers,title):
    dateLinSpace = np.linspace(200,3500,200)
    approvalLinSpace = np.linspace(.5,1,200)
    x,y = np.meshgrid(approvalLinSpace,dateLinSpace )
    z = np.zeros((200,200))
    for rowIndex in range(len(x)):
        for valIndex in range(len(x[rowIndex])):
            z[rowIndex][valIndex] = reg.predict([[x[rowIndex][valIndex],y[rowIndex][valIndex]]])

    fig = plt.figure()
    ax = fig.gca(projection = '3d')
    ax.plot_wireframe(x,y,z)
    ax.set_xlabel('Approval rate')
    ax.set_ylabel('Days in position')
    ax.scatter([approvalRate[0] for approvalRate in testLabels],
               [days[1] for days in testLabels],
               testAnswers, c = 'yellow')
    ax.scatter([approvalRate[0] for approvalRate in trainList],
               [days[1] for days in trainList],
               trainAnswers, c = 'purple')
    yellow_patch = mpatches.Patch(color='yellow', label='Test Data')
    purple_patch = mpatches.Patch(color='purple', label='Training data')
    blue_patch = mpatches.Patch(color = 'blue', label = 'Predicted')
    plt.legend(handles=[yellow_patch,purple_patch,blue_patch])
    plt.title(title)
    plt.show()

In [25]:
def trainModel(df):
    trainingDf = df[df.index % 4 != 0]
    testDf = df[df.index % 4 == 0]
    trainList, trainAnswers = dfToLabels(trainingDf)
    testLabels, testAnswers = dfToLabels(testDf)

    reg = linear_model.LinearRegression()
    reg.fit(trainList,trainAnswers)

    prediction = reg.predict(testLabels)
    print('Mean squared error = ', mean_squared_error(prediction,testAnswers))
    return reg,trainList,trainAnswers,testLabels,testAnswers