<a href="https://colab.research.google.com/github/Sak-shi-src/PRODIGY_ML_Task01/blob/main/prodigy_task01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# Generate synthetic data
np.random.seed(42)

# Number of samples
n_samples = 100

# Generate random data for features (square footage, number of bathrooms, and number of bedrooms)
square_footage = np.random.randint(800, 4000, size=n_samples)  # square footage between 800 and 4000
bathrooms = np.random.randint(1, 5, size=n_samples)  # number of bathrooms between 1 and 5
bedrooms = np.random.randint(1, 6, size=n_samples)  # number of bedrooms between 1 and 5

# Generate prices based on a linear combination of features with some noise
price = (square_footage * 100) + (bathrooms * 5000) + (bedrooms * 3000) + np.random.normal(0, 20000, size=n_samples)

# Create a DataFrame
data = pd.DataFrame({
    'Square Footage': square_footage,
    'Bathrooms': bathrooms,
    'Bedrooms': bedrooms,
    'Price': price
})

# Show the first few rows of the dataset
data.head(10)


Unnamed: 0,Square Footage,Bathrooms,Bedrooms,Price
0,3974,4,4,448696.211647
1,1660,1,4,161703.376943
2,2094,1,4,228574.235993
3,1930,1,4,212115.272941
4,1895,3,4,234913.334314
5,3892,1,3,398665.550889
6,2438,1,2,267860.966221
7,2969,1,4,335953.749309
8,1266,3,1,107603.941257
9,2038,1,1,235172.624869


In [4]:
# Define features and target variable
X = data[['Square Footage', 'Bedrooms', 'Bathrooms']]
y = data['Price']

In [7]:
# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)


In [11]:
# Make predictions
y_pred = model.predict(X_test)
print(y_pred)


[118576.00597822 165948.1221163  410988.13282404 368110.84262641
 246656.8246254  389269.82282279 209792.60310648 374981.07795907
 138565.97734595 425234.39608451 322710.72716303 382645.1623258
 388195.55761078 357026.19753124 210422.25319405 218007.73941894
 215285.00891158 250907.98852466 320940.54452443 143172.96757418]


In [23]:
# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(mae)
print(mse)
print(rmse)
print(r2)

24216.02700148828
800046087.2072932
28285.08595014859
0.9295292145339262


In [14]:
# Print results
print(f"Mean Absolute Error: {mae:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

Mean Absolute Error: 24216.03
Mean Squared Error: 800046087.21
Root Mean Squared Error: 28285.09
R² Score: 0.9295


In [15]:
# Display the model coefficients
print("\nModel Coefficients:")
print(f"Intercept: {model.intercept_:.2f}")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")


Model Coefficients:
Intercept: -4879.76
Square Footage: 96.02
Bedrooms: 4530.30
Bathrooms: 7603.54
