In [1]:
import pandas as pd
import os

# Load the cleaned dataset
df_cleaned = pd.read_csv("cleaned_dataset.csv")

# Display basic info for verification
print("\n✅ Cleaned Dataset Loaded Successfully!")
print("📌 Dataset Shape:", df_cleaned.shape)
print("📝 First 5 Rows:")
print(df_cleaned.head())



✅ Cleaned Dataset Loaded Successfully!
📌 Dataset Shape: (4524526, 42)
📝 First 5 Rows:
         pH      Iron   Nitrate    Chloride           Lead      Zinc  \
0  8.332988  0.000083  8.605777  122.799772   3.713298e-52  3.434827   
1  6.917863  0.000081  3.734167  227.029851   7.849262e-94  1.245317   
2  5.443762  0.020106  3.816994  230.995630   5.286616e-76  0.528280   
3  7.955339  0.143988  8.224944  178.129940  3.997118e-176  4.027879   
4  8.091909  0.002167  9.925788  186.540872  4.171069e-132  3.807511   

   Turbidity  Fluoride    Copper      Odor  ...  Month_December  \
0   0.022683  0.607283  0.144599  1.626212  ...           False   
1   0.019007  0.622874  0.437835  1.686049  ...           False   
2   0.319956  0.423423  0.431588  3.414619  ...           False   
3   0.166319  0.208454  0.239451  1.769302  ...           False   
4   0.004867  0.222912  0.616574  0.795310  ...           False   

   Month_February  Month_January  Month_July  Month_June  Month_March  \
0   

In [2]:
from sklearn.preprocessing import MinMaxScaler

# Identify numerical columns (excluding 'Target')
num_cols = df_cleaned.select_dtypes(include=['float64', 'int64']).columns.tolist()
num_cols.remove("Target")  # Exclude target variable

# Apply Min-Max Scaling
scaler = MinMaxScaler()
df_cleaned[num_cols] = scaler.fit_transform(df_cleaned[num_cols])

# ✅ Verification: Print Min & Max values to confirm scaling
print("\n📊 **Verification - Normalized Dataset Summary:**")
print(df_cleaned[num_cols].describe().loc[['min', 'max']])  # Should be between 0 and 1

# Save the normalized dataset
df_cleaned.to_csv("normalized_dataset.csv", index=False)
print("\n✅ Entire Dataset Normalized & Saved as 'normalized_dataset.csv'")



📊 **Verification - Normalized Dataset Summary:**
      pH  Iron  Nitrate  Chloride  Lead  Zinc  Turbidity  Fluoride  Copper  \
min  0.0   0.0      0.0       0.0   0.0   0.0        0.0       0.0     0.0   
max  1.0   1.0      1.0       1.0   1.0   1.0        1.0       1.0     1.0   

     Odor  Sulfate  Conductivity  Chlorine  Manganese  Total Dissolved Solids  \
min   0.0      0.0           0.0       0.0        0.0                     0.0   
max   1.0      1.0           1.0       1.0        1.0                     1.0   

     Water Temperature  Air Temperature  Day  Time of Day  
min                0.0              0.0  0.0          0.0  
max                1.0              1.0  1.0          1.0  

✅ Entire Dataset Normalized & Saved as 'normalized_dataset.csv'


In [3]:
import pandas as pd
import numpy as np
import os
import cv2
from tqdm import tqdm

# Load the normalized dataset
df_normalized = pd.read_csv("normalized_dataset.csv")

# Define the main folder for storing images on K drive
main_folder = r"K:\DWTM_Images"
os.makedirs(main_folder, exist_ok=True)

# Image Settings
img_size = (28, 28)  # Standard size for images
pixel_intensity_scale = 255  # Scale normalized values to pixel intensities

# Iterate through each feature (column) to create subfolders
for feature in tqdm(df_normalized.columns, desc="Creating Folders"):
    feature_folder = os.path.join(main_folder, feature)
    os.makedirs(feature_folder, exist_ok=True)

# Convert each row into an image representation
for idx, row in tqdm(df_normalized.iterrows(), total=df_normalized.shape[0], desc="Generating Images"):
    for feature in df_normalized.columns:
        # Create an empty image
        img = np.ones(img_size, dtype=np.uint8) * 255  # Start with a white background

        # Map feature value to pixel intensity
        intensity = int(row[feature] * pixel_intensity_scale)  # Scale to 0-255

        # Fill the image with the intensity value
        img[:, :] = intensity

        # Save the image in the corresponding feature folder on K drive
        img_path = os.path.join(main_folder, feature, f"{idx}.png")
        cv2.imwrite(img_path, img)

# ✅ Image Conversion Completed!
print("\n✅ All Rows Converted to Images & Saved in 'K:\DWTM_Images\'")


Creating Folders: 100%|██████████| 42/42 [00:00<00:00, 7001.34it/s]
Generating Images: 100%|██████████| 4524526/4524526 [13:57:19<00:00, 90.06it/s]    


✅ All Rows Converted to Images & Saved in 'K:\DWTM_Images'



