In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import plotly.express as px

# Generating dummy data
np.random.seed(0)
X = 2.5 * np.random.randn(100) + 1.5   # Array of 100 values with mean = 1.5, stddev = 2.5
res = 0.5 * np.random.randn(100)       # Generate 100 residual terms
y = 2 + 0.3 * X + res                  # Actual values of Y

# Convert to pandas DataFrame
df = pd.DataFrame({
    'X': X,
    'y': y
})
df

Unnamed: 0,X,y
0,5.910131,4.714615
1,2.500393,2.076238
2,3.946845,2.548811
3,7.102233,4.615368
4,6.168895,3.264107
...,...,...
95,3.266433,2.894157
96,1.526250,2.843770
97,5.964676,4.201155
98,1.817280,3.626802


In [19]:
y_train

43    4.230597
62    2.314851
3     4.615368
71    2.706101
45    2.593684
        ...   
96    2.843770
67    2.024701
64    1.970352
47    3.591626
44    3.259333
Name: y, Length: 80, dtype: float64

In [18]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['X'], df['y'], test_size=0.2, random_state=0)


# Training the model
model = LinearRegression()
model.fit(X_train.values.reshape(-1,1), y_train)

# Making predictions
y_pred = model.predict(X_test.values.reshape(-1,1))


# Creating a DataFrame with test and predicted values for visualization
test_pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

In [10]:
# Plot the training data points
fig = px.scatter(df, x=X_train, y=y_train, labels={'x': 'X', 'y': 'y'},
                color_discrete_sequence=['blue'],
                title='Linear Regression Model')

# Plot the test data points
fig.add_scatter(x=X_test, y=y_test, mode='markers', marker=dict(color='red'), name='Test Data')

# Generate a sequence of X values spanning the range
x_range = np.linspace(df['X'].min(), df['X'].max(), 100)
# Predict y values for the x_range
y_range = model.predict(x_range.reshape(-1, 1))

# Add the regression line to the plot
fig.add_traces(px.line(x=x_range, y=y_range, labels={'x': 'X', 'y': 'Predictions'}, color_discrete_sequence=['black']).data)

# Show the plot
fig.show()


In [12]:
# Generate a sequence of X values spanning the range
x_range = np.linspace(df['X'].min(), df['X'].max(), 100)
# Predict y values for the x_range
y_range = model.predict(x_range.reshape(-1, 1))

# Plot the training data points
fig = px.line(x=x_range, y=y_range, labels={'x': 'X', 'y': 'y'},
                color_discrete_sequence=['black'],
                title='Linear Regression Model')

# Plot the test data points
fig.add_scatter(x=X_test, y=y_test, mode='markers', marker=dict(color='red'), name='Test Data')
fig.add_scatter(x=X_train, y=y_train, mode='markers', marker=dict(color='blue'), name='Test Data')

# Add the regression line to the plot
#fig.add_traces(px.line(x=x_range, y=y_range, labels={'x': 'X', 'y': 'Predictions'}, color_discrete_sequence=['black']).data)

# Show the plot
fig.show()

In [13]:
import plotly.graph_objects as go

# Create figure
fig = go.Figure()

# Add traces for training data points
fig.add_trace(go.Scatter(x=X_train, y=y_train, mode='markers', marker=dict(color='blue'), name='Training Data'))

# Add traces for test data points
fig.add_trace(go.Scatter(x=X_test, y=y_test, mode='markers', marker=dict(color='red'), name='Test Data'))

# Add a trace for the regression line
fig.add_trace(go.Scatter(x=x_range, y=y_range, mode='lines', line=dict(color='black'), name='Regression Line'))

# Update the layout
fig.update_layout(title='Linear Regression Model', xaxis_title='X', yaxis_title='y')

# Show the plot
fig.show()

In [None]:
``