In [12]:
import pandas as pd
import joblib

# Load the processed dataset and the scaler
df = pd.read_csv('../data/processed/ml_ready_data1_old.csv')
scaler = joblib.load('../models/feature_scaler1.joblib')

# Columns that were used in scaling (based on the error message)
scaled_columns = ['ieltsMarks', 'minimumGPA', 'tuitionFeeUSD', 'toefl_ibt', 'universityRankingNum']

# Check if the columns in the scaler match the columns we want to inverse transform
scaler_columns = scaler.feature_names_in_

# Check if the feature columns match
if set(scaled_columns) != set(scaler_columns):
    raise ValueError(f"Mismatch between the columns used for scaling: {scaled_columns} and the columns in the scaler: {scaler_columns}")

# Isolate only the scaled columns
scaled_data = df[scaled_columns]

# Inverse transform the scaled features
scaled_data_inverse = scaler.inverse_transform(scaled_data)

# Convert the inverse-transformed data back to a DataFrame
scaled_data_inverse_df = pd.DataFrame(scaled_data_inverse, columns=scaled_columns)

# Now we add the human-readable 'toefl_ibt' column
df['human_readable_toefl_ibt'] = scaled_data_inverse_df['toefl_ibt']

# Display the results: original 'toefl_ibt' and the human-readable 'toefl_ibt'
print("\nHuman-readable TOEFL scores:")
print(df[['toefl_ibt', 'human_readable_toefl_ibt']])

# Optionally, save the updated dataframe if you want to export the results
df.to_csv('../data/processed/ml_ready_data1_human_readable.csv', index=False)



Human-readable TOEFL scores:
     toefl_ibt  human_readable_toefl_ibt
0     0.265713                422.078561
1     0.265713                422.078561
2     0.265713                422.078561
3     0.265713                422.078561
4     0.265713                422.078561
..         ...                       ...
126   0.892378                540.455332
127   0.892378                540.455332
128   0.892378                540.455332
129   0.265713                422.078561
130   0.265713                422.078561

[131 rows x 2 columns]
