In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/merged_groundwater.csv")

In [None]:
df.columns

Index(['well_no', 'source', 'datetime', 'date_parsed', 'time',
       'station_name_unified', 'district_unified', 'taluk_unified',
       'village_unified', 'latitude_unified', 'longitude_unified',
       'well_type_unified', 'well_depth_m_unified', 'water_level_bgl_m',
       'water_level_bmp_m', 'is_dry', 'remarks_clean', 'geology',
       'geomorphology', 'land_use', 'record_id'],
      dtype='object')

In [None]:
df = df.drop(columns={'source','datetime','station_name_unified', 'district_unified','village_unified','remarks_clean', 'record_id'})

In [None]:
df['time'] = df['time'].fillna("00:00:00")

In [None]:
df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S').dt.hour

In [None]:
df.columns

Index(['well_no', 'date_parsed', 'time', 'taluk_unified', 'latitude_unified',
       'longitude_unified', 'well_type_unified', 'well_depth_m_unified',
       'water_level_bgl_m', 'water_level_bmp_m', 'is_dry', 'geology',
       'geomorphology', 'land_use'],
      dtype='object')

In [None]:
df.head()

Unnamed: 0,well_no,date_parsed,time,taluk_unified,latitude_unified,longitude_unified,well_type_unified,well_depth_m_unified,water_level_bgl_m,water_level_bmp_m,is_dry,geology,geomorphology,land_use
0,53040,1975-01-01,0,Anchetty,12.363333,77.715556,Dug Well,13.2,6.56,7.31,False,CNK,BP,Rural
1,53046,1975-01-01,0,Shoolagiri,12.796667,77.966667,Dug Well,13.0,3.33,4.08,False,GGN,BP,Rural
2,53021,1975-01-01,0,Bargur,12.55,78.35,Dug Well,12.0,2.29,3.11,False,GGN,BP,Rural
3,53017,1975-01-01,0,Bargur,12.408333,78.35,Dug Well,18.200001,1.28,2.09,False,GGN,BP,Rural
4,53042,1975-04-01,0,Krishnagiri,12.5275,78.212778,Dug Well,10.6,8.05,8.9,False,GGN,BP,Rural


In [None]:
df.isna().sum()

Unnamed: 0,0
well_no,0
date_parsed,3104
time,0
taluk_unified,12
latitude_unified,0
longitude_unified,0
well_type_unified,12
well_depth_m_unified,12
water_level_bgl_m,5560
water_level_bmp_m,5561


In [None]:
import pandas as pd

# Convert to datetime
df['date_parsed'] = pd.to_datetime(df['date_parsed'], errors='coerce')

# Drop rows where date is still missing (≈7%)
df = df.dropna(subset=['date_parsed'])


In [None]:
def fill_with_mode(series):
    mode = series.mode()
    if not mode.empty:
        return series.fillna(mode[0])
    else:
        return series  # leave NaNs for next step

# Step 1: fill within same well_no
df['taluk_unified'] = df.groupby('well_no')['taluk_unified'].transform(fill_with_mode)

# Step 2: still missing? fallback to global mode
if df['taluk_unified'].isna().sum() > 0:
    global_mode = df['taluk_unified'].mode()
    if not global_mode.empty:
        df['taluk_unified'] = df['taluk_unified'].fillna(global_mode[0])



In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['well_no'] = le.fit_transform(df['well_no'])

In [None]:
def safe_mode_fill(series):
    mode = series.mode()
    if not mode.empty:
        return series.fillna(mode[0])
    else:
        return series  # do nothing if no mode

# Step 1: mode per well_no
df['well_type_unified'] = df.groupby('well_no')['well_type_unified'].transform(safe_mode_fill)

# Step 2: mode per taluk
df['well_type_unified'] = df.groupby('taluk_unified')['well_type_unified'].transform(safe_mode_fill)

# Step 3: fallback to global mode
if df['well_type_unified'].isna().sum() > 0:
    global_mode = df['well_type_unified'].mode()
    if not global_mode.empty:
        df['well_type_unified'] = df['well_type_unified'].fillna(global_mode[0])


In [None]:
# Forward/backward fill within well_no
df['well_depth_m_unified'] = df.groupby('well_no')['well_depth_m_unified'].transform(
    lambda x: x.fillna(method='ffill').fillna(method='bfill')
)

# Median depth per taluk
df['well_depth_m_unified'] = df.groupby('taluk_unified')['well_depth_m_unified'].transform(
    lambda x: x.fillna(x.median())
)


  lambda x: x.fillna(method='ffill').fillna(method='bfill')


In [None]:
# Sort by well_no + date for interpolation
df = df.sort_values(['well_no', 'date_parsed'])

# Interpolate within each well’s time series
df['water_level_bgl_m'] = df.groupby('well_no')['water_level_bgl_m'].transform(
    lambda x: x.interpolate(limit_direction='both')
)

df['water_level_bmp_m'] = df.groupby('well_no')['water_level_bmp_m'].transform(
    lambda x: x.interpolate(limit_direction='both')
)

# Drop wells with no water level info at all
df = df.dropna(subset=['water_level_bgl_m', 'water_level_bmp_m'], how='all')


In [None]:
for col in ['geology', 'geomorphology', 'land_use']:
    # Fill within same well
    df[col] = df.groupby('well_no')[col].transform(
        lambda x: x.fillna(method='ffill').fillna(method='bfill')
    )

    # Mode within taluk
    df[col] = df.groupby('taluk_unified')[col].transform(
        lambda x: x.fillna(x.mode()[0] if not x.mode().empty else None)
    )


    # Fill any leftover with "Unknown"
    df[col] = df[col].fillna("Unknown")


  lambda x: x.fillna(method='ffill').fillna(method='bfill')
  lambda x: x.fillna(method='ffill').fillna(method='bfill')
  lambda x: x.fillna(method='ffill').fillna(method='bfill')
  lambda x: x.fillna(method='ffill').fillna(method='bfill')
  lambda x: x.fillna(method='ffill').fillna(method='bfill')
  lambda x: x.fillna(method='ffill').fillna(method='bfill')


In [None]:
df['is_dry'] = df['is_dry'].astype(int)


In [None]:
df['year'] = df['date_parsed'].dt.year
df['month'] = df['date_parsed'].dt.month
df['quarter'] = df['date_parsed'].dt.quarter

# Define season (simple mapping)
df['season'] = df['month'] % 12 // 3 + 1  # 1=Winter, 2=Spring, 3=Summer, 4=Fall


In [None]:
from sklearn.preprocessing import StandardScaler

num_cols = ['latitude_unified', 'longitude_unified',
            'well_depth_m_unified', 'water_level_bgl_m', 'water_level_bmp_m']

scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])


In [None]:
df.head()

Unnamed: 0,well_no,date_parsed,time,taluk_unified,latitude_unified,longitude_unified,well_type_unified,well_depth_m_unified,water_level_bgl_m,water_level_bmp_m,is_dry,geology,geomorphology,land_use,year,month,quarter,season
568,0,1975-03-12,0,Uthangarai,-1.441661,1.868717,Dug Well,-0.463435,-1.094996,-1.054326,0,GGN,BP,Rural,1975,3,1,2
94,0,1975-04-03,0,Uthangarai,-1.441661,1.868717,Dug Well,-0.463435,-0.540061,-0.499318,0,GGN,BP,Rural,1975,4,2,2
152,0,1975-04-04,0,Uthangarai,-1.441661,1.868717,Dug Well,-0.463435,-0.662454,-0.621727,1,GGN,BP,Rural,1975,4,2,2
10,0,1975-05-01,0,Uthangarai,-1.441661,1.868717,Dug Well,-0.463435,-0.784848,-0.744137,0,GGN,BP,Rural,1975,5,2,2
49,0,1975-05-02,0,Uthangarai,-1.441661,1.868717,Dug Well,-0.463435,-0.640026,-0.599296,0,GGN,BP,Rural,1975,5,2,2


In [None]:
# If you want the index also saved:
df.to_csv("cleaned_groundwater_with_index.csv", index=True)


In [None]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd


# --- Encoding categorical variables ---

# Label encoding for taluk, geology, geomorphology
for col in ["taluk_unified", "geology", "geomorphology"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))

# Binary encoding for well_type
df["well_type_unified"] = df["well_type_unified"].map({"Dug Well": 0, "Bore Well": 1})

# One-hot encoding for land_use
df = pd.get_dummies(df, columns=["land_use"], prefix="land_use")

# --- Date feature engineering ---
df["day_of_month"] = df["date_parsed"].dt.day



In [None]:
df.head()

Unnamed: 0,well_no,date_parsed,time,taluk_unified,latitude_unified,longitude_unified,well_type_unified,well_depth_m_unified,water_level_bgl_m,water_level_bmp_m,...,geomorphology,year,month,quarter,season,land_use_Agricultural,land_use_Domestic,land_use_Rural,land_use_Urban,day_of_month
568,0,1975-03-12,0,7,-1.441661,1.868717,0,-0.463435,-1.094996,-1.054326,...,0,1975,3,1,2,False,False,True,False,12
94,0,1975-04-03,0,7,-1.441661,1.868717,0,-0.463435,-0.540061,-0.499318,...,0,1975,4,2,2,False,False,True,False,3
152,0,1975-04-04,0,7,-1.441661,1.868717,0,-0.463435,-0.662454,-0.621727,...,0,1975,4,2,2,False,False,True,False,4
10,0,1975-05-01,0,7,-1.441661,1.868717,0,-0.463435,-0.784848,-0.744137,...,0,1975,5,2,2,False,False,True,False,1
49,0,1975-05-02,0,7,-1.441661,1.868717,0,-0.463435,-0.640026,-0.599296,...,0,1975,5,2,2,False,False,True,False,2


In [None]:
df.shape

(42840, 22)

In [None]:
df.to_csv("processed_groundwater.csv", index=True)


In [None]:
df.dtypes

Unnamed: 0,0
well_no,int64
date_parsed,datetime64[ns]
time,int32
taluk_unified,int64
latitude_unified,float64
longitude_unified,float64
well_type_unified,int64
well_depth_m_unified,float64
water_level_bgl_m,float64
water_level_bmp_m,float64


In [None]:
# Full pipeline (summary)
# Requirements: pandas, numpy, scikit-learn, matplotlib, lightgbm, joblib
# Load the dataset (update path as needed)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from lightgbm import LGBMRegressor
import joblib
from pathlib import Path

file_path = "processed_groundwater.csv"
df = pd.read_csv(file_path, low_memory=False)
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

In [None]:
df['date_parsed'] = pd.to_datetime(df['date_parsed'], errors='coerce')
df = df.sort_values(['well_no','date_parsed','time']).reset_index(drop=True)


In [None]:

target_col = 'water_level_bgl_m'
# Basic time features
df['year'] = df['date_parsed'].dt.year
df['month'] = df['date_parsed'].dt.month
df['day'] = df['date_parsed'].dt.day
df['dayofyear'] = df['date_parsed'].dt.dayofyear
df['weekofyear'] = df['date_parsed'].dt.isocalendar().week.astype(int)


In [None]:
# Convert is_dry to numeric
df['is_dry'] = df['is_dry'].map({'True':1,'False':0}).fillna(df['is_dry'])
df['is_dry'] = pd.to_numeric(df['is_dry'], errors='coerce').fillna(0).astype(int)

In [None]:
# Numeric conversions
df['well_depth_m_unified'] = pd.to_numeric(df['well_depth_m_unified'], errors='coerce')
df['latitude_unified'] = pd.to_numeric(df['latitude_unified'], errors='coerce')
df['longitude_unified'] = pd.to_numeric(df['longitude_unified'], errors='coerce')


In [None]:
# Lag features per well
def add_lag_features(g):
    g = g.sort_values('date_parsed')
    g['lag_1'] = g[target_col].shift(1)
    g['lag_7'] = g[target_col].shift(7)
    g['rolling_7_mean'] = g[target_col].shift(1).rolling(window=7, min_periods=1).mean()
    g['rolling_30_mean'] = g[target_col].shift(1).rolling(window=30, min_periods=1).mean()
    g['obs_count'] = np.arange(1, len(g)+1)
    return g

df = df.groupby('well_no', group_keys=False).apply(add_lag_features)
df = df[~df[target_col].isna()].copy()

for col in ['lag_1','lag_7','rolling_7_mean','rolling_30_mean']:
    df[col] = df.groupby('well_no')[col].transform(lambda x: x.fillna(x.mean()))
    df[col] = df[col].fillna(df[col].mean())

  df = df.groupby('well_no', group_keys=False).apply(add_lag_features)


In [None]:


# Label encode categorical columns
cat_cols = ['well_type_unified', 'geology', 'geomorphology', 'taluk_unified', 'season']
for c in cat_cols:
    if c in df.columns:
        df[c] = df[c].astype(str).fillna('missing')
        df[c + '_enc'] = LabelEncoder().fit_transform(df[c])

# Land-use columns (kept as is)
land_cols = [c for c in df.columns if c.startswith('land_use_')]


In [None]:
# Fill remaining numeric missing
numeric_cols = ['well_depth_m_unified','latitude_unified','longitude_unified'] + ['lag_1','lag_7','rolling_7_mean','rolling_30_mean','obs_count']
for c in numeric_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce')
        df[c] = df[c].fillna(df[c].median())

In [None]:
# Time-aware per-well split (train 70%, val 10%, test 20%)
def assign_splits(g, train_frac=0.7, val_frac=0.1):
    n = len(g)
    g = g.sort_values('date_parsed').copy()
    train_end = int(n * train_frac)
    val_end = int(n * (train_frac + val_frac))
    g['split'] = 'train'
    g.iloc[train_end:val_end, g.columns.get_loc('split')] = 'val'
    g.iloc[val_end:, g.columns.get_loc('split')] = 'test'
    return g

df = df.groupby('well_no', group_keys=False).apply(assign_splits)

  df = df.groupby('well_no', group_keys=False).apply(assign_splits)


In [None]:
feature_cols = [
    'well_depth_m_unified','latitude_unified','longitude_unified',
    'lag_1','lag_7','rolling_7_mean','rolling_30_mean','obs_count'
] + [c + '_enc' for c in cat_cols if c in df.columns] + land_cols + ['month','dayofyear','weekofyear','year']
feature_cols = [c for c in feature_cols if c in df.columns]

train_df = df[df['split']=='train']
val_df = df[df['split']=='val']
test_df = df[df['split']=='test']

X_train, y_train = train_df[feature_cols], train_df[target_col]
X_val, y_val = val_df[feature_cols], val_df[target_col]
X_test, y_test = test_df[feature_cols], test_df[target_col]

In [None]:
# Train LightGBM (if your local LightGBM supports early stopping, use it; otherwise train with fixed n_estimators)
model = LGBMRegressor(learning_rate=0.05, n_estimators=500, num_leaves=31, random_state=42)
model.fit(X_train, y_train)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005912 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2007
[LightGBM] [Info] Number of data points in the train set: 29903, number of used features: 21
[LightGBM] [Info] Start training from score 0.068892


In [None]:
def evaluate_model(m, X, y, label="set"):
    preds = m.predict(X)
    mae = mean_absolute_error(y, preds)
    rmse = np.sqrt(mean_squared_error(y, preds))   # <- compute RMSE explicitly
    r2 = r2_score(y, preds)
    print(f"{label} -> MAE: {mae:.3f}, RMSE: {rmse:.3f}, R2: {r2:.3f}")
    return preds, mae, rmse, r2

# Usage:
val_preds, val_mae, val_rmse, val_r2 = evaluate_model(model, X_val, y_val, label="Val")
test_preds, test_mae, test_rmse, test_r2 = evaluate_model(model, X_test, y_test, label="Test")

Val -> MAE: 0.175, RMSE: 0.332, R2: 0.865
Test -> MAE: 0.182, RMSE: 0.312, R2: 0.796


In [None]:
X_train.head()

Unnamed: 0,well_depth_m_unified,latitude_unified,longitude_unified,lag_1,lag_7,rolling_7_mean,rolling_30_mean,obs_count,well_type_unified_enc,geology_enc,...,taluk_unified_enc,season_enc,land_use_Agricultural,land_use_Domestic,land_use_Rural,land_use_Urban,month,dayofyear,weekofyear,year
0,-0.463435,-1.441661,1.868717,-0.784537,-0.78688,-0.787292,-0.790547,1,0,1,...,7,1,False,False,True,False,3,71,11,1975
1,-0.463435,-1.441661,1.868717,-1.094996,-0.78688,-1.094996,-1.094996,2,0,1,...,7,1,False,False,True,False,4,93,14,1975
2,-0.463435,-1.441661,1.868717,-0.540061,-0.78688,-0.817529,-0.817529,3,0,1,...,7,1,False,False,True,False,4,94,14,1975
3,-0.463435,-1.441661,1.868717,-0.662454,-0.78688,-0.765837,-0.765837,4,0,1,...,7,1,False,False,True,False,5,121,18,1975
4,-0.463435,-1.441661,1.868717,-0.784848,-0.78688,-0.77059,-0.77059,5,0,1,...,7,1,False,False,True,False,5,122,18,1975


In [None]:
X_test.head()

Unnamed: 0,well_depth_m_unified,latitude_unified,longitude_unified,lag_1,lag_7,rolling_7_mean,rolling_30_mean,obs_count,well_type_unified_enc,geology_enc,...,taluk_unified_enc,season_enc,land_use_Agricultural,land_use_Domestic,land_use_Rural,land_use_Urban,month,dayofyear,weekofyear,year
131,-0.463435,-1.441661,1.868717,-0.623365,-0.811761,-0.68827,-0.759878,132,0,1,...,7,0,False,False,True,False,1,4,1,1987
132,-0.463435,-1.441661,1.868717,-0.652202,-0.828422,-0.665475,-0.757849,133,0,1,...,7,0,False,False,True,False,1,12,3,1987
133,-0.463435,-1.441661,1.868717,-0.681038,-0.740846,-0.64442,-0.752808,134,0,1,...,7,0,False,False,True,False,2,32,5,1987
134,-0.463435,-1.441661,1.868717,-0.693854,-0.65327,-0.637707,-0.759429,135,0,1,...,7,0,False,False,True,False,2,33,6,1987
135,-0.463435,-1.441661,1.868717,-0.55544,-0.565693,-0.623732,-0.751611,136,0,1,...,7,0,False,False,True,False,2,38,6,1987


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [None]:
df1 = pd.read_csv("/content/processed_groundwater.csv")

In [None]:
# Required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [None]:
df1.head()

Unnamed: 0.1,Unnamed: 0,well_no,date_parsed,time,taluk_unified,latitude_unified,longitude_unified,well_type_unified,well_depth_m_unified,water_level_bgl_m,...,geomorphology,year,month,quarter,season,land_use_Agricultural,land_use_Domestic,land_use_Rural,land_use_Urban,day_of_month
0,568,0,1975-03-12,0,7,-1.441661,1.868717,0,-0.463435,-1.094996,...,0,1975,3,1,2,False,False,True,False,12
1,94,0,1975-04-03,0,7,-1.441661,1.868717,0,-0.463435,-0.540061,...,0,1975,4,2,2,False,False,True,False,3
2,152,0,1975-04-04,0,7,-1.441661,1.868717,0,-0.463435,-0.662454,...,0,1975,4,2,2,False,False,True,False,4
3,10,0,1975-05-01,0,7,-1.441661,1.868717,0,-0.463435,-0.784848,...,0,1975,5,2,2,False,False,True,False,1
4,49,0,1975-05-02,0,7,-1.441661,1.868717,0,-0.463435,-0.640026,...,0,1975,5,2,2,False,False,True,False,2


In [None]:
# 2. Sort by well_no and date
df1 = df1.sort_values(by=['well_no', 'date_parsed']).reset_index(drop=True)

In [None]:
categorical_cols = ['well_type_unified', 'geomorphology', 'season']
for col in categorical_cols:
    if col in df1.columns:
        df[col] = LabelEncoder().fit_transform(df[col].astype(str))


In [None]:
df1['water_level_bgl_m'] = df1.groupby('well_no')['water_level_bgl_m'].ffill().bfill()

In [None]:
feature_cols = [
    'well_type_unified', 'well_depth_m_unified', 'geomorphology', 'latitude_unified',
    'longitude_unified', 'year', 'month', 'quarter', 'season',
    'land_use_Agricultural', 'land_use_Domestic', 'land_use_Rural', 'land_use_Urban', 'day_of_month'
]

In [None]:
target_col = 'water_level_bgl_m'


In [None]:
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[feature_cols])
y_scaled = scaler_y.fit_transform(df[[target_col]])

In [None]:
def create_sequences_per_well(df, X_scaled, y_scaled, time_steps=12):
    X_seq, y_seq = [], []
    start_idx = 0
    for well_no, group in df.groupby('well_no'):
        n = len(group)
        for i in range(n - time_steps):
            X_seq.append(X_scaled[start_idx + i : start_idx + i + time_steps])
            y_seq.append(y_scaled[start_idx + i + time_steps])
        start_idx += n
    return np.array(X_seq), np.array(y_seq)

TIME_STEPS = 12
X_seq, y_seq = create_sequences_per_well(df, X_scaled, y_scaled, TIME_STEPS)

In [None]:
split_idx = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]

In [None]:
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
class GroundwaterLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=1):
        super(GroundwaterLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out[:, -1, :])  # take last time step
        out = self.fc(out)
        return out

In [None]:
input_dim = X_train.shape[2]
model = GroundwaterLSTM(input_dim)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [None]:
EPOCHS = 100
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.6f}')


Epoch 10/100, Loss: 0.001647
Epoch 20/100, Loss: 0.001600
Epoch 30/100, Loss: 0.001579
Epoch 40/100, Loss: 0.001540
Epoch 50/100, Loss: 0.001488
Epoch 60/100, Loss: 0.001441
Epoch 70/100, Loss: 0.001410
Epoch 80/100, Loss: 0.001395
Epoch 90/100, Loss: 0.001364
Epoch 100/100, Loss: 0.001324


In [None]:
model.eval()
with torch.no_grad():
    y_pred_t = model(X_test_t).numpy()

y_pred = scaler_y.inverse_transform(y_pred_t)
y_true = scaler_y.inverse_transform(y_test)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('MAE:', mean_absolute_error(y_true, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_true, y_pred)))

NameError: name 'y_true' is not defined