In [1]:
import pandas as pd
import pprint as pp

Read data into a Pandas dataframe.

In [3]:
data = pd.read_csv('history_data.csv')

Convert "characterization of blueprints" data into numbers from 0 to 2 corresponding to how detailed the blueprints are. This is the only cardinal attribute.

In [5]:
data['Characterization of Blueprints'] = data['Characterization of Blueprints'].astype('category')
data['Characterization of Blueprints'] = data['Characterization of Blueprints'].cat.reorder_categories(['Hastily Sketched', 'Deceptively Ordinary', 'Obsessively Detailed'], ordered=True)
data['Characterization of Blueprints'] = data['Characterization of Blueprints'].cat.codes

Convert the rest of the data into "dummy variables" so we can perform regression on it.

In [9]:
dummyData = pd.get_dummies(data, prefix=['Background of Architect', 'Structure Type', 'Required Construction Materials', 'Is Completed Structure Impossible?'], drop_first=True)
dummyData.head()

In [11]:
X = data[['Background of Architect',
               'Structure Type',
               'Required Construction Materials',
               'Characterization of Blueprints']]
Y = data['Is Completed Structure Impossible?']

In [12]:
X = pd.get_dummies(data=X, drop_first=True)
Y = pd.get_dummies(data=X, drop_first=True)

Perform the regression analysis.

In [13]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = .20, random_state = 42)

regr = linear_model.LinearRegression() # Do not use fit_intercept = False if you have removed 1 column after dummy encoding
regr.fit(X_train, Y_train)
predicted = regr.predict(X_test)

In [15]:
pp.pprint(predicted)

array([[ 2.00000000e+00, -1.65678666e-16,  4.72175286e-16, ...,
        -1.51148168e-16, -1.12873307e-16, -5.87831102e-17],
       [ 2.00000000e+00,  2.59839943e-16,  9.51162877e-16, ...,
        -2.52145946e-16, -2.49088597e-16, -3.04989722e-16],
       [ 1.00000000e+00, -4.68244087e-17,  2.58466796e-16, ...,
        -2.62963106e-16,  9.33403943e-18,  6.45354062e-17],
       ...,
       [ 2.83889751e-15,  4.39583339e-16, -5.31351902e-16, ...,
        -2.02333295e-16, -2.38194415e-17, -2.31000002e-16],
       [ 2.00000000e+00,  2.39571712e-16,  2.59416046e-16, ...,
         2.57073109e-17, -2.42478450e-16,  1.20840851e-16],
       [ 2.00000000e+00, -3.82255512e-16,  4.87697297e-16, ...,
         1.00000000e+00, -5.32514733e-16, -1.97010813e-16]])
