In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
# read dataframe
elon_tesla_data = pd.read_csv("../etl_data/filtered_elon_tesla_stock_data.csv")

# convert sentiment types to a number -1 for negative, 0 for neutral and 1 for positive
# this is to represent the sentiment types as ordinal values instead 
# as there is a clear order between the sentiment types
def get_ordinal_sentiment_type(sentiment_type):
    if (sentiment_type == "neutral"):
        return 0
    elif (sentiment_type == "negative"):
        return -1
    else:
        return 1
    
elon_tesla_data['sentiment type'] = elon_tesla_data['sentiment type'].apply(get_ordinal_sentiment_type)

In [3]:

# create the x and y for the model
X = elon_tesla_data[['reply count', 'sentiment type', 'sentiment score']]
y = elon_tesla_data['change_percent']
# split the data
X_train, X_valid, y_train, y_valid = train_test_split(X, y)

In [4]:
# create a model using the random forest regressor
model = make_pipeline(
    StandardScaler(),
    RandomForestRegressor(200, max_depth=5, min_samples_leaf=20)
)

model.fit(X_train, y_train)

print("Training score: ", model.score(X_train, y_train))
print("Validation score: ", model.score(X_valid, y_valid))

Training score:  0.0793172306819857
Validation score:  -0.019627985412903337


In [5]:
# creating a regression model using KNeighbors
model = make_pipeline(
    StandardScaler(),
    KNeighborsRegressor(25)
)
model.fit(X_train, y_train)

# test the score
print("Training score: ", model.score(X_train, y_train))
print("Validation score: ", model.score(X_valid, y_valid))


Training score:  0.032523928811462155
Validation score:  -0.04098961123446143


In [6]:
# creating a regression model using a voting regressor
model = make_pipeline(
    StandardScaler(),
    VotingRegressor(estimators=[
            ('Kn', KNeighborsRegressor(50)), 
            ('Rf', RandomForestRegressor(100, max_depth=5, min_samples_leaf=20)),
            ('Gr', GradientBoostingRegressor(random_state=1))
            ])
)

model.fit(X_train, y_train)

# test the score
print("Training score: ", model.score(X_train, y_train))
print("Validation score: ", model.score(X_valid, y_valid))

Training score:  0.16593253676713993
Validation score:  -0.008808858477606929
