In [9]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# ===============================
# 1. Load dataset
# ===============================
df = pd.read_csv("weather.csv")

# Convert date to datetime
df["date"] = pd.to_datetime(df["date"])

# Drop rows where TARGET is missing ✅
df = df.dropna(subset=["temperature_c"])

# Feature engineering from date
df["month"] = df["date"].dt.month
df["day"] = df["date"].dt.day

# Drop original date column
df = df.drop(columns=["date"])

# ===============================
# 2. Features & Target
# ===============================
X = df.drop(columns=["temperature_c"])
y = df["temperature_c"]

# ===============================
# 3. Train-test split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ===============================
# 4. Numeric pipeline
# ===============================
numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

preprocessor = ColumnTransformer([
    ("num", numeric_pipeline, X.columns)
])

# ===============================
# 5. Model pipeline
# ===============================
model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# ===============================
# 6. Train model
# ===============================
model.fit(X_train, y_train)

# ===============================
# 7. Evaluate
# ===============================
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
accuracy_percent = r2 * 100

print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R²:", r2_score(y_test, y_pred))
print("Accuracy (%):", round(accuracy_percent, 2), "%")
# ===============================
# 8. MANUAL CHECKING (Custom Input)
# ===============================
manual_input = pd.DataFrame([{
    "humidity_percent": 49.963209507789,
    "pressure_hpa": 1006.1548572926689,
    "wind_speed_kmph": 29.199932439598815,
    "cloud_cover_percent": 63.81445683721739,
    "rainfall_mm": 14.945602060220825,
    "sunshine_hours": 10.166838932357038,
    "month": 1,
    "day": 1
}])
#date,humidity_percent,pressure_hpa,wind_speed_kmph,cloud_cover_percent,rainfall_mm,sunshine_hours,month,day
#2015-01-01,49.963209507789,1006.1548572926689,29.199932439598815,63.81445683721739,14.945602060220825,10.166838932357038,1,1
#65,1013,8,30,0,7,6,12
#49.963209507789,1006.1548572926689,29.199932439598815,63.81445683721739,14.945602060220825,10.166838932357038,63.4
manual_prediction = model.predict(manual_input)

print("\n--- MANUAL PREDICTION ---")
print("Predicted Temperature (°C):", round(manual_prediction[0], 2))

# ===============================
# 9. MANUAL VERIFICATION ON REAL TEST DATA
# ===============================
sample_test = X_test.sample(5, random_state=1)
sample_actual = y_test.loc[sample_test.index]
sample_pred = model.predict(sample_test)

comparison = pd.DataFrame({
    "Actual_Temp": sample_actual.values,
    "Predicted_Temp": np.round(sample_pred, 2)
})

print("\n--- MANUAL CHECK ON REAL TEST DATA ---")
print(comparison)


MAE: 1.231440986114304
RMSE: 1.5039987299435247
R²: 0.952009049862316
Accuracy (%): 95.2 %

--- MANUAL PREDICTION ---
Predicted Temperature (°C): 62.5

--- MANUAL CHECK ON REAL TEST DATA ---
   Actual_Temp  Predicted_Temp
0        55.57           54.29
1        63.06           64.30
2        52.46           52.40
3        57.30           58.50
4        59.32           60.89


In [10]:
class CreditHistoryValidator(BaseEstimator, TransformerMixin):

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        if (0<X["Credit_History"] <3).any():
            raise ValueError("Credit_History should be between 0 to 3")
        if (X["ApplicantIncome"] < 0).any():
            raise ValueError("ApplicantIncome cannot be negative")
        return X


model_pipeline = Pipeline([
    ("credit_history_validation", CreditHistoryValidator()),
    ("preprocess", preprocessor),
    ("model", LogisticRegression(class_weight="balanced",max_iter=3000))
])

NameError: name 'BaseEstimator' is not defined