# Graph Quality Classifier Using CNN

This project demonstrates an end-to-end pipeline for classifying graphs generated by forecasting systems into four categories:
- **Good**
- **Passable**
- **Bad**
- **None** (non-forecast)

It involves:
- Generating synthetic graphs for each class using `matplotlib`
- Training a convolutional neural network (MobileNetV2 backbone)
- Saving and using the trained model for live classification from memory (`BytesIO`)


## Dataset Overview

The dataset is auto-generated using the script `data.py`, which creates:
- 70 images each for `good`, `passable`, `bad`
- 50 images for `none` (random chart types)

The total dataset is stored under the folder: `graph_classifier_dataset_final`.


## Model Architecture and Training

We use **MobileNetV2** as the feature extractor (transfer learning), followed by:
- Global Average Pooling
- Dense Layer with Dropout
- Final classification layer with softmax activation for 4 classes


In [5]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam


ModuleNotFoundError: No module named 'tensorflow.keras'

In [4]:
dataset_path = r'retail_store_inventory.csv'
img_size = (128, 128)
batch_size = 8

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    dataset_path,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_gen = datagen.flow_from_directory(
    dataset_path,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

class_names = list(train_gen.class_indices.keys())
print("Class labels:", class_names)


NameError: name 'ImageDataGenerator' is not defined

In [None]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(4, activation='softmax')  # 4 output classes
])

model.compile(optimizer=Adam(learning_rate=0.0005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
history = model.fit(train_gen, validation_data=val_gen, epochs=10)


In [None]:
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss')
plt.legend()
plt.show()


In [None]:
model.save("graph_quality_final_model.h5")
print("âœ… Model saved as 'graph_quality_final_model.h5'")


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# 1. Dataset path & parameters
dataset_path = r'C:\Users\saksh\Desktop\Github own\Trial 2\graph_classifier_dataset_final'
img_size = (128, 128)
batch_size = 8

# 2. Data generators
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    dataset_path,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_gen = datagen.flow_from_directory(
    dataset_path,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# 3. Model architecture
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(4, activation='softmax')  # 4 classes
])

model.compile(optimizer=Adam(learning_rate=0.0005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 4. Train
history = model.fit(train_gen, validation_data=val_gen, epochs=10)

# 5. Save model
model.save("graph_quality_final_model.h5")

# 6. Plot
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss')
plt.legend()
plt.show()


In [None]:
# Importing Necessary Libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime


In [None]:
store = r"C:\Users\saksh\Desktop\Github own\Trial 2\retail_store_inventory.csv"
df=pd.read_csv(store)

#show basic information about the data set
df.info()

#Display first 5 rows to understand data structre
df.head()

In [None]:
print(df.isnull().sum())

df['Date'] = pd.to_datetime(df['Date']) # Check date range

print("Date Range: ", df['Date'].min(),"to",df['Date'].max())


In [None]:
df['Date'] = pd.to_datetime(df['Date'])

df['YearMonth'] = df['Date'].dt.to_period('M')

monthly_demand = df.groupby(['YearMonth','Region'])['Demand Forecast'].sum().reset_index()

monthly_demand['YearMonth'] = monthly_demand['YearMonth'].dt.to_timestamp()

monthly_demand.head()

In [None]:
# Show min and max dates in the grouped monthly data
print("Earliest record:", monthly_demand['YearMonth'].min())
print("Latest record:", monthly_demand['YearMonth'].max())


In [None]:
# Check which regions have data in Jan 2024
monthly_demand[monthly_demand['YearMonth'] == '2024-01-01']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plot_data = monthly_demand[monthly_demand['YearMonth']<'2024-01-01']
# âœ… Proper plotting
plt.figure(figsize=(14, 6))
sns.lineplot(data=plot_data, x='YearMonth', y='Demand Forecast', hue='Region', marker='o')

plt.title('ðŸ“Š Monthly Demand Forecast by Region (2022â€“2024)', fontsize=16)
plt.xlabel('Month')
plt.ylabel('Total Demand Forecast')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend(title='Region')
plt.tight_layout()
plt.show()


In [None]:
from statsmodels.tsa.api import ExponentialSmoothing

forecast_list = []

regions = plot_data['Region'].unique()

forecast_periods = 6
future_dates = pd.date_range(start='2024-01-01', periods=forecast_periods, freq='MS')

for region in regions:
    regional_data = plot_data[plot_data['Region'] == region]
    ts = regional_data.set_index('YearMonth')['Demand Forecast']

    model = ExponentialSmoothing(ts,trend='add', seasonal='add', seasonal_periods=12)
    fitted_model=model.fit()
    forecast = fitted_model.forecast(forecast_periods)

    forecast_df = pd.DataFrame({
        'YearMonth': future_dates,
        'Region': region,
        'Demand Forecast': forecast.values
    })

    forecast_list.append(forecast_df)


forecast_df = pd.concat(forecast_list, ignore_index=True)

final_plot = pd.concat([plot_data, forecast_df])

In [None]:
from io import BytesIO
import matplotlib.pyplot as plt

region_graphs = {}

for region in final_plot['Region'].unique():
    region_data = final_plot[final_plot['Region'] == region].copy()
    region_data['YearMonth'] = pd.to_datetime(region_data['YearMonth'])

    # âœ… Properly split using actual forecast start
    split_date = pd.to_datetime('2024-01-01')
    actual = region_data[region_data['YearMonth'] < split_date]
    forecast = region_data[region_data['YearMonth'] >= split_date]

    # Plot
    fig, ax = plt.subplots(figsize=(6, 3))
    ax.plot(actual['YearMonth'], actual['Demand Forecast'], color='blue', label='Actual')
    ax.plot(forecast['YearMonth'], forecast['Demand Forecast'], color='red', linestyle='--', label='Forecast')
    ax.legend()
    ax.set_title(f"{region} - Demand Forecast (2022â€“2024)")
    plt.tight_layout()

    # Save to BytesIO
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close(fig)
    buf.seek(0)

    region_graphs[region] = buf


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image
from openpyxl import Workbook
from datetime import datetime

# Load the correct model
model_path = r"C:\Users\saksh\Desktop\Github own\Trial 2\graph_quality_final_model.h5"
model = load_model(model_path)
labels = ['bad', 'good', 'passable', 'none']

# Setup Excel
wb = Workbook()
ws = wb.active
ws.append(["Region", "Prediction", "Confidence", "Timestamp"])

# Predict
for region, buffer in region_graphs.items():
    img = Image.open(buffer).convert('RGB').resize((128, 128))
    x = img_to_array(img) / 255.0
    x = np.expand_dims(x, axis=0)
    pred = model.predict(x)[0]
    pred_label = labels[np.argmax(pred)]
    confidence = float(np.max(pred))
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    ws.append([region, pred_label, round(confidence, 3), timestamp])

# Save final Excel
output_path = r"C:\Users\saksh\Desktop\Github own\Trial 2\final_graph_predictions.xlsx"
wb.save(output_path)
print("âœ… Excel saved at:", output_path)


In [None]:

import matplotlib.pyplot as plt

plt.figure(figsize=(14, 6))

for region in regions:
    region_data = final_plot[final_plot['Region'] == region].copy()
    region_data['YearMonth'] = pd.to_datetime(region_data['YearMonth'])

    # Create a line style column
    region_data['Line Style'] = region_data['YearMonth'].apply(
        lambda x: 'Actual' if x < pd.to_datetime('2024-01-01') else 'Forecast'
    )

    # Sort for smoothness
    region_data.sort_values('YearMonth', inplace=True)

    # Plot continuous data with style change
    last_style = None
    buffer = []

    for idx, row in region_data.iterrows():
        if last_style is None:
            last_style = row['Line Style']
        if row['Line Style'] != last_style:
            segment = pd.DataFrame(buffer)
            plt.plot(segment['YearMonth'], segment['Demand Forecast'],
                     label=f"{region} - {last_style}",
                     linestyle='-' if last_style == 'Actual' else '--',
                     color='blue' if last_style == 'Actual' else 'red')
            buffer = []
            last_style = row['Line Style']
        buffer.append(row)

    # Plot last segment
    if buffer:
        segment = pd.DataFrame(buffer)
        plt.plot(segment['YearMonth'], segment['Demand Forecast'],
                 label=f"{region} - {last_style}",
                 linestyle='-' if last_style == 'Actual' else '--',
                 color='blue' if last_style == 'Actual' else 'red')

plt.title("ðŸ“ˆ Regional Demand Forecast: Jan 2022 â€“ Jun 2024", fontsize=16)
plt.xlabel("Month")
plt.ylabel("Total Demand Forecast")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:

import matplotlib.pyplot as plt

# Ensure proper datetime format
final_plot['YearMonth'] = pd.to_datetime(final_plot['YearMonth'])

# Create separate plots for each region without any gap
for region in regions:
    region_data = final_plot[final_plot['Region'] == region].copy()
    region_data.sort_values('YearMonth', inplace=True)

    plt.figure(figsize=(10, 5))

    # Plot full line, but split on the fly by color & linestyle
    for i in range(len(region_data) - 1):
        x_vals = [region_data.iloc[i]['YearMonth'], region_data.iloc[i+1]['YearMonth']]
        y_vals = [region_data.iloc[i]['Demand Forecast'], region_data.iloc[i+1]['Demand Forecast']]
        current_date = region_data.iloc[i]['YearMonth']
        next_date = region_data.iloc[i+1]['YearMonth']
        linestyle = '--' if current_date >= pd.to_datetime('2024-01-01') else '-'
        color = 'red' if current_date >= pd.to_datetime('2024-01-01') else 'blue'

        plt.plot(x_vals, y_vals, linestyle=linestyle, color=color)

    plt.title(f"ðŸ“ˆ {region} - Demand Forecast (2022â€“2024)", fontsize=14)
    plt.xlabel("Month")
    plt.ylabel("Total Demand Forecast")
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()


## Inference on Live Images (In-Memory)

To simulate real-time predictions, forecast graphs are generated in memory and passed directly to the model using `BytesIO`.

This allows us to evaluate classification performance on generated visual data without writing to disk.
