In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# --- Understanding Features and Instances ---
# Creating a simple dataset
np.random.seed(42)
data = {
    'Age': np.random.randint(20, 60, 10),
    'Salary': np.random.randint(30000, 100000, 10),
    'House Price': np.random.randint(150000, 500000, 10)
}
df = pd.DataFrame(data)

# Display the dataset
print("Dataset (Features and Instances):")
print(df)

# Features (X) and Target Variable (y)
X = df[['Age', 'Salary']]
y = df['House Price']

# --- Review of Python DataFrames ---
print("\nDataFrame Information:")
print(df.info())
print("\nDataFrame Summary Statistics:")
print(df.describe())

# Visualizing the features
plt.scatter(df['Age'], df['House Price'], label='Age vs House Price')
plt.scatter(df['Salary'], df['House Price'], label='Salary vs House Price')
plt.xlabel("Feature Values")
plt.ylabel("House Price")
plt.legend()
plt.title("Feature Relationships with House Price")
plt.show()

# --- Introducing Scikit-Learn ---
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define input and output features (already done: X and y)

# Initialize an estimator (Linear Regression model)
model = LinearRegression()

# Fit the estimator (train the model)
model.fit(X_train, y_train)

# Use the fitted estimator to make predictions
y_pred = model.predict(X_test)

# Evaluate the fitted estimator's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Performance:")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Plot actual vs predicted values
plt.scatter(y_test, y_pred)
plt.xlabel("Actual House Prices")
plt.ylabel("Predicted House Prices")
plt.title("Actual vs Predicted House Prices")
plt.show()


ModuleNotFoundError: No module named 'numpy'