In [None]:
mounted = False
try:
    import google.colab
    from google.colab import drive
    IN_COLAB = True
except:
    IN_COLAB = False

import pandas as pd
from sklearn.linear_model import LinearRegression

def mount_drive():
    global mounted
    drive.mount("/content/drive", force_remount=True)
    mounted = True

def open_dataset(filepath):
    if IN_COLAB:
        filepath = '/content/drive/My Drive/UNI/CE101/{}'.format(filepath)
        if not mounted:
            mount_drive()
    data = pd.read_csv(filepath)
    return data

train_dataset = open_dataset("train.csv")
test_dataset = open_dataset("test.csv")

#Prints info about the variables contained within both datasets.
print(train_dataset.info())
print(test_dataset.info())

#Determines the missing values from the test dataset
# and shows which columns have at least one missing value.
missing_val = test_dataset.isnull().sum()
print('Data missing from test dataset:\n',missing_val[missing_val > 0])

#Determines the missing values from the train dataset
# and shows which columns have at least one missing value.
missing_val = train_dataset.isnull().sum()
print('Data missing from train dataset:\n',missing_val[missing_val > 0])
missing_val = missing_val[missing_val > 0]
print(missing_val.plot.bar())

In [None]:

#Describes all of the features of both datasets.
print(train_dataset.describe())
print(test_dataset.describe())


# Selects the variable that the model will determine. By convention,
# this is called y
y_train = train_dataset.SalePrice

# Creates a list of features to be selected from the dataset.
# This list is not complete, and all pre-processing of data should
# be completed before selecting features.
sales_features = ['BedroomAbvGr']
X_train = train_dataset[sales_features]
X_test = test_dataset[sales_features]

# This code will be used when selection only a certain amount of features.
#X_train = train_dataset[sales_features]
#X_test= test_dataset[sales_features]


#Prints the basic description of the selected features from both the test and train dataset.
print(X_train.describe(),'\n', X_test.describe())

In [None]:
X_train.head()

In [None]:
# Create the machine learning model linear regression
model = LinearRegression()


# train our model with the selected features.
model.fit(X_train, y_train)

# Predict data based on our model
final_predict = model.predict(X_test)


pd.DataFrame({'Id': test_dataset.Id, 'SalePrice': final_predict}).to_csv("output.csv", index=False)

In [None]:
if IN_COLAB and mounted:
     drive.flush_and_unmount()