In [45]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error


In [4]:
records_1 = pd.read_csv("request_logs.csv")
records_2 = pd.read_csv("request_logs1.csv")
merged = pd.read_csv("merged_request_logs.csv")

In [5]:
merged.head(-5)

Unnamed: 0,name,username,email,response_time,request
0,oNIHdciiaq,xHOKYveSFn,POZmQNFocE@test.com,0.078,1
1,jH15OWvQAT,64nIG1tbS9,iJVKV8ARo6@test.com,0.009,2
2,ItfhzrV0ta,R1G71cqJZ6,0LuCO2PwKE@test.com,0.008,3
3,Tg0HOMLviC,Ewta1LxVb9,ZZY6AUZYMK@test.com,0.006,4
4,gOxCBeXcX4,PhoM6em4mJ,g8knXVNe7Y@test.com,0.007,5
...,...,...,...,...,...
199990,aNOrwls4xp,n7rnxieW4Y,NbAp1KA52c@test.com,0.003,199991
199991,OUqcybqd0N,uSUmnGIJes,XK4kFv9Qpi@test.com,0.004,199992
199992,At6LFVwK3P,VhYPxTSBIV,951FZV9LCX@test.com,0.004,199993
199993,2KI3Pa9jo8,oB3j4DY2DK,yMMLteQjM9@test.com,0.005,199994


In [13]:
# merging with ID number
# DO NOT RUN THIS CELL
# not required anymore!
merged['request'] = range(1, len(merged) + 1)
merged.to_csv("merged_request_logs.csv", index=False)
# not required anymore!

In [25]:
plt.figure(figsize=(10, 6))
sns.barplot(x='request', y='response_time', data=merged, palette='viridis')
plt.title('Response Time by Request')
plt.xlabel('Request')
plt.ylabel('Response Time')
plt.show()

In [None]:
heatmap_data = merged.pivot('request', 'name', 'response_time')
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap_data, cmap='coolwarm', annot=True, fmt=".3f")
plt.title('Response Time Heatmap')
plt.xlabel('Name')
plt.ylabel('Request')
plt.show()

In [7]:
merged.dtypes

name              object
username          object
email             object
response_time    float64
request            int64
dtype: object

In [21]:
X = merged.drop(columns=['name', 'username', 'email', 'response_time'])
y = merged['response_time']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
# Linear Regression model
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train)

predictions = linear_reg_model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.1008832568841245e-05


In [24]:
# Decision Tree model
decision_tree_model = DecisionTreeRegressor()
decision_tree_model.fit(X_train, y_train)

decision_tree_predictions = decision_tree_model.predict(X_test)

decision_tree_mse = mean_squared_error(y_test, decision_tree_predictions)
print("Decision Tree Mean Squared Error:", decision_tree_mse)


Decision Tree Mean Squared Error: 1.9755225000000002e-05


In [25]:
# Random Forest Model
random_forest_model = RandomForestRegressor()
random_forest_model.fit(X_train, y_train)

random_forest_predictions = random_forest_model.predict(X_test)

random_forest_mse = mean_squared_error(y_test, random_forest_predictions)
print("Random Forest Mean Squared Error:", random_forest_mse)

Random Forest Mean Squared Error: 1.4065313519999994e-05


In [56]:
# Predicting the processing time of 200005th request
request_value = 2000005
linearModel = []

for i in range(request_value, request_value+10):
    request_value_2d = np.array([i]).reshape(-1, 1)
    predicted_response_time = linear_reg_model.predict(request_value_2d)
    linearModel.append(predicted_response_time)
    print(f"Predicted Response Time for the {i} request:", predicted_response_time)


Predicted Response Time for the 2000005 request: [0.00897008]
Predicted Response Time for the 2000006 request: [0.00897009]
Predicted Response Time for the 2000007 request: [0.00897009]
Predicted Response Time for the 2000008 request: [0.00897009]
Predicted Response Time for the 2000009 request: [0.00897009]
Predicted Response Time for the 2000010 request: [0.00897009]
Predicted Response Time for the 2000011 request: [0.0089701]
Predicted Response Time for the 2000012 request: [0.0089701]
Predicted Response Time for the 2000013 request: [0.0089701]
Predicted Response Time for the 2000014 request: [0.0089701]




In [55]:
# Predicting the processing time of 200005th request with random forest
request_value = 2000005
randomForest = []

for i in range(request_value, request_value+10):
    request_value_2d = np.array([i]).reshape(-1, 1)
    predicted_response_time = random_forest_model.predict(request_value_2d)
    randomForest.append(predicted_response_time)
    print(f"Predicted Response Time for the {i} request:", predicted_response_time)

Predicted Response Time for the 2000005 request: [0.00416]
Predicted Response Time for the 2000006 request: [0.00416]
Predicted Response Time for the 2000007 request: [0.00416]
Predicted Response Time for the 2000008 request: [0.00416]
Predicted Response Time for the 2000009 request: [0.00416]
Predicted Response Time for the 2000010 request: [0.00416]
Predicted Response Time for the 2000011 request: [0.00416]
Predicted Response Time for the 2000012 request: [0.00416]
Predicted Response Time for the 2000013 request: [0.00416]
Predicted Response Time for the 2000014 request: [0.00416]




In [71]:
# Predicting the processing time of 200005th request with random forest
request_value = 2000005
request_points_decisionTree = [1,100,1000,10000,100000,1000000,10000000, 100000000, 1000000000, 10000000000]
decisionTree = []

for i in range(request_value, request_value+10):
    request_value_2d = np.array([i]).reshape(-1, 1)
    predicted_response_time = decision_tree_model.predict(request_value_2d)
    print(f"Predicted Response Time for the {i} request:", predicted_response_time)

for i in request_points_decisionTree:
    request_value_2d = np.array([i]).reshape(-1, 1)
    predicted_response_time = decision_tree_model.predict(request_value_2d)
    decisionTree.append(predicted_response_time)

Predicted Response Time for the 2000005 request: [0.004]
Predicted Response Time for the 2000006 request: [0.004]
Predicted Response Time for the 2000007 request: [0.004]
Predicted Response Time for the 2000008 request: [0.004]
Predicted Response Time for the 2000009 request: [0.004]
Predicted Response Time for the 2000010 request: [0.004]
Predicted Response Time for the 2000011 request: [0.004]
Predicted Response Time for the 2000012 request: [0.004]
Predicted Response Time for the 2000013 request: [0.004]
Predicted Response Time for the 2000014 request: [0.004]




In [75]:
# Define your data
a = ['decisionTree', 'LinearRegression', 'RandomForest']
b = [ decisionTree, LinearRegression, randomForest ]

print(x,y)

[array([0.078]), array([0.006]), array([0.006]), array([0.006]), array([0.005]), array([0.004]), array([0.004]), array([0.004]), array([0.004]), array([0.004])] [1, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000]
