In [4]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(0)

# Number of rows
num_rows = 500

# Generate random data
# Assuming a simple linear relationship y = 2x + noise
X = np.random.rand(num_rows) * 100  # Independent variable
noise = np.random.randn(num_rows) * 10  # Random noise
y = 2 * X + noise  # Dependent variable

# Create a DataFrame
data = pd.DataFrame({
    'X': X,
    'y': y
})

# Save to CSV
csv_file = 'regression_data.csv'
data.to_csv(csv_file, index=False)

print(f'Data saved to {csv_file}')


Data saved to regression_data.csv


In [5]:
data


Unnamed: 0,X,y
0,54.881350,99.907593
1,71.518937,128.319523
2,60.276338,137.034025
3,54.488318,110.618914
4,42.365480,90.403863
...,...,...
495,27.165277,44.684489
496,45.544415,91.687777
497,40.171354,78.217477
498,24.841347,42.061548


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
data = pd.read_csv('regression_data.csv')

# Separate features and target variable
X = data[['X']]
y = data['y']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize and train the regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Output the results
print(f'Linear Regression Model Performance:')
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

# Display the model's coefficients
print(f'Coefficient: {model.coef_[0]:.2f}')
print(f'Intercept: {model.intercept_:.2f}')


Linear Regression Model Performance:
Mean Squared Error: 101.82
R-squared: 0.97
Coefficient: 2.01
Intercept: -1.47


In [7]:
import joblib

# Save the trained model
joblib.dump(model, 'linear_regression_model.pkl')

print('Model saved to linear_regression_model.pkl')


Model saved to linear_regression_model.pkl
