In [3]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Sample dataset
data = {
    'StudyHours': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'PrevExamScore': [30, 40, 45, 50, 60, 65, 70, 75, 80, 85],
    'Pass': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
}

df = pd.DataFrame(data)
X_original = df[['StudyHours', 'PrevExamScore']]
y = df['Pass']

# --- OLS with Backward Elimination ---
X_ols = sm.add_constant(X_original)
model = sm.OLS(y, X_ols).fit()
print("\nOLS Full Model:")
print(model.summary())

# Backward elimination
if model.pvalues.get('StudyHours', 0) > 0.05:
    X_ols = X_ols.drop(columns='StudyHours')
    model = sm.OLS(y, X_ols).fit()

print("\nOLS Final Model after Backward Elimination:")
print(model.summary())

# --- Forward Selection ---
def forward_selection(X, y):
    remaining_features = set(X.columns)
    selected_features = []
    current_score = 0.0

    while remaining_features:
        scores_with_candidates = []

        for feature in remaining_features:
            features_to_test = selected_features + [feature]
            X_train, X_test, y_train, y_test = train_test_split(X[features_to_test], y, test_size=0.2, random_state=42)
            model = LinearRegression()
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            score = r2_score(y_test, y_pred)
            scores_with_candidates.append((score, feature))

        scores_with_candidates.sort(reverse=True)
        best_score, best_feature = scores_with_candidates[0]

        if current_score < best_score:
            remaining_features.remove(best_feature)
            selected_features.append(best_feature)
            current_score = best_score
        else:
            break

    return selected_features

best_features = forward_selection(X_original, y)
print(f"\nSelected features using Forward Selection: {best_features}")

# --- LASSO Regression ---
X_train, X_test, y_train, y_test = train_test_split(X_original, y, test_size=0.2, random_state=42)
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
y_pred = lasso_model.predict(X_test)
r2 = r2_score(y_test, y_pred)

print(f"\nLASSO R-squared score: {r2:.3f}")
print("LASSO Coefficients:")
print(pd.Series(lasso_model.coef_, index=X_original.columns))


ModuleNotFoundError: No module named 'statsmodels'