In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import mean_squared_error, r2_score

# Read the dataset
data = pd.read_csv("./Data/PricePredictionData.csv", names=["HostelName", "No_of_Bed", "Ac", "Laundry", "FoodType", "Rating", "Security", "price"])

# Feature engineering
data['Bed_to_Price_Ratio'] = data['No_of_Bed'] / data['price']
data = pd.get_dummies(data, columns=['FoodType'])

# Prepare the features and target variable
X = data.drop(['HostelName', 'price'], axis=1)
y = data['price']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=10)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature selection
selector = SelectKBest(f_regression, k=4)  # Select top 4 features
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)

# Create a Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train_selected, y_train)
print(model.score(X_test_scaled,y_test))

# Make predictions on the test set
y_pred = model.predict(X_test_selected)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)


print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R^2 Score:", r2)


ValueError: X has 8 features, but LinearRegression is expecting 4 features as input.