In [10]:
import numpy as np
import scipy.stats as stats
import plotly.graph_objects as go

# Define parameters
n = 100  # number of data points
beta0 = 2  # intercept
beta1 = 1.5  # slope
sigma = 1  # standard deviation of errors

# Generate predictor variable (x) from a uniform distribution
np.random.seed(42)
x = stats.uniform.rvs(0, 10, size=n)

# Generate error terms from a normal distribution
errors = stats.norm.rvs(0, sigma, size=n)

# Generate outcome variable (y) using the theoretical linear model
y = beta0 + beta1 * x + errors

# Visualization of the data and the true regression line
fig = go.Figure()

# Scatter plot of the data points (x, y)
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', name='Data'))

# Add the true theoretical line (without noise)
x_line = np.array([x.min(), x.max()])
y_line = beta0 + beta1 * x_line
fig.add_trace(go.Scatter(x=x_line, y=y_line, mode='lines', name='True Line', line=dict(color='orange', dash='dot')))

# Show the plot
fig.update_layout(title='Simple Linear Regression: True Model vs Simulated Data',
                  xaxis_title='Predictor (x)',
                  yaxis_title='Outcome (y)')
fig.show()


In [11]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.formula.api as smf
import plotly.express as px
import plotly.graph_objects as go

# Combine x and y into a pandas DataFrame
df = pd.DataFrame({'x': x, 'y': y})


In [34]:
# Specify and fit the OLS regression model
model_data_specification = smf.ols("y ~ x", data=df)  # Specifies the linear model y ~ x
fitted_model = model_data_specification.fit()  # Fits the model to the data

# Model summary and parameters
fitted_model.summary()  # Provides a detailed summary of the regression results
fitted_model.summary().tables[1]  # Provides the coefficient table
fitted_model.params  # Provides the estimated intercept and slope
fitted_model.params.values  # Array of the estimated intercept and slope values
fitted_model.rsquared  # Provides the R-squared value of the model

# Create a scatter plot of the data and add the fitted model line
df['Data'] = 'Data'  # Hack to add the data label to the legend
fig = px.scatter(df, x='x', y='y', color='Data', title='y vs. x with Fitted Line')

# Add the fitted regression line from the OLS model
fig.add_scatter(x=df['x'], y=fitted_model.fittedvalues,
                line=dict(color='blue'), name="trendline='ols'")


# Show the plot
fig.show()

In [69]:
import numpy as np
import scipy.stats as stats
import plotly.graph_objects as go

# Define parameters
n = 100  # number of data points
beta0 = 2  # intercept
beta1 = 1.5  # slope
sigma = 1  # standard deviation of errors

# Generate predictor variable (x) from a uniform distribution
x = stats.uniform.rvs(0, 10, size=n)

# Generate error terms from a normal distribution
errors = stats.norm.rvs(0, sigma, size=n)
# Generate outcome variable (y) using the theoretical linear model
y = beta0 + beta1 * x + errors

# Visualization of the data and the true regression line
fig = go.Figure()

# Scatter plot of the data points (x, y)
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', name='Data'))

# Add the true theoretical line (without noise)
x_line = np.array([x.min(), x.max()])
y_line = beta0 + beta1 * x_line

# Combine x and y into a pandas DataFrame
df = pd.DataFrame({'x': x, 'y': y})

# Specify and fit the OLS regression model
model_data_specification = smf.ols("y ~ x", data=df)  # Specifies the linear model y ~ x
fitted_model = model_data_specification.fit()  # Fits the model to the data

# Model summary and parameters
fitted_model.summary()  # Provides a detailed summary of the regression results
fitted_model.summary().tables[1]  # Provides the coefficient table
fitted_model.params  # Provides the estimated intercept and slope
fitted_model.params.values  # Array of the estimated intercept and slope values
fitted_model.rsquared  # Provides the R-squared value of the model

# Create a scatter plot of the data and add the fitted model line
df['Data'] = 'Data'  # Hack to add the data label to the legend
fig = px.scatter(df, x='x', y='y', color='Data', title='y vs. x with Fitted Line')

# Add the fitted regression line from the OLS model
fig.add_scatter(x=df['x'], y=fitted_model.fittedvalues,
                line=dict(color='blue'), name="trendline='ols'")


# Adding the theoretical line from Question 1
x_range = np.array([df['x'].min(), df['x'].max()])
y_line = beta0 + beta1 * x_range  # y = beta0 + beta1 * x

# Add the theoretical line (dotted and orange) to the plot
fig.add_scatter(x=x_range, y=y_line, mode='lines',
                name=str(beta0)+' + '+str(beta1)+' * x', 
                line=dict(dash='dot', color='orange'))

# Show the updated plot with both the fitted and theoretical lines
fig.show()
