In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers


In [None]:
data_train_path = 'Fruits_Vegetables/train'
data_test_path = 'Fruits_Vegetables/test'
data_val_path = 'Fruits_Vegetables/validation'

In [None]:
img_width = 180
img_height =180 

In [None]:
data_train = tf.keras.utils.image_dataset_from_directory(
    data_train_path,
    shuffle=True,
    image_size=(img_width, img_height),
    batch_size=32,
    validation_split=False)

In [None]:
data_cat = data_train.class_names

In [None]:
data_cat

In [None]:
# Save class names to a text file
with open('class_names.txt', 'w') as f:
    for class_name in data_cat:
        f.write(class_name + '\n')

In [None]:
data_val = tf.keras.utils.image_dataset_from_directory(data_val_path,
                                                       image_size=(img_height,img_width),
                                                       batch_size=32,
                                                        shuffle=False,
                                                       validation_split=False)

In [None]:
data_test = tf.keras.utils.image_dataset_from_directory(
data_test_path,
    image_size=(img_height,img_width),
    shuffle=False,
    batch_size=32,
    validation_split=False
)

In [None]:
plt.figure(figsize=(10,10))
for image, labels in data_train.take(1):
    for i in range(9):
        plt.subplot(3,3,i+1)
        plt.imshow(image[i].numpy().astype('uint8'))
        plt.title(data_cat[labels[i]])
        plt.axis('off')

In [None]:
from tensorflow.keras.models import Sequential

In [None]:
data_train

In [None]:
model = Sequential([
    layers.Rescaling(1./255),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32,3, padding='same',activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dropout(0.2),
    layers.Dense(128),
    layers.Dense(len(data_cat))
                  
])

In [None]:
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [None]:
epochs_size = 25
history = model.fit(data_train, validation_data=data_val, epochs=epochs_size)

In [None]:
epochs_range = range(epochs_size)
plt.figure(figsize=(8,8))
plt.subplot(1,2,1)
plt.plot(epochs_range,history.history['accuracy'],label = 'Training Accuracy')
plt.plot(epochs_range, history.history['val_accuracy'],label = 'Validation Accuracy')
plt.title('Accuracy')

plt.subplot(1,2,2)
plt.plot(epochs_range,history.history['loss'],label = 'Training Loss')
plt.plot(epochs_range, history.history['val_loss'],label = 'Validation Loss')
plt.title('Loss')

In [None]:
image = 'corn.jpg'
image = tf.keras.utils.load_img(image, target_size=(img_height,img_width))
img_arr = tf.keras.utils.array_to_img(image)
img_bat=tf.expand_dims(img_arr,0)

In [None]:
predict = model.predict(img_bat)

In [None]:
score = tf.nn.softmax(predict)

In [None]:
print('Veg/Fruit in image is {} with accuracy of {:0.2f}'.format(data_cat[np.argmax(score)],np.max(score)*100))

In [None]:
model.save('C:/Users/Ragib/Desktop/Temp_arif/Image_classify.h5') # change according to path

In [None]:
# Import additional libraries needed for analysis
import os
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt

# Function to analyze the image dataset
def analyze_image_dataset(base_dir, class_names):
    results = []
    
    # Analyze each class
    for class_name in class_names:
        class_path = os.path.join(base_dir, class_name)
        files = os.listdir(class_path)
        image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
        
        # Initialize tracking variables
        file_sizes = []
        resolutions = []
        
        # Analyze each image
        for img_file in image_files:
            img_path = os.path.join(class_path, img_file)
            
            # Get file size in KB
            file_size = os.path.getsize(img_path) / 1024  # KB
            file_sizes.append(file_size)
            
            # Get image resolution
            with Image.open(img_path) as img:
                width, height = img.size
                resolutions.append((width, height))
        
        # Compile results
        result = {
            'class_name': class_name,
            'num_files': len(image_files),
            'file_size_min_kb': round(min(file_sizes), 2) if file_sizes else 0,
            'file_size_max_kb': round(max(file_sizes), 2) if file_sizes else 0,
            'file_size_avg_kb': round(np.mean(file_sizes), 2) if file_sizes else 0,
            'min_resolution': min(resolutions, key=lambda x: x[0]*x[1]) if resolutions else (0, 0),
            'max_resolution': max(resolutions, key=lambda x: x[0]*x[1]) if resolutions else (0, 0),
            'sample_files': image_files[:3] if len(image_files) > 3 else image_files
        }
        results.append(result)
    
    return results

# Analyze all three datasets
datasets = {
    'train': data_train_path,
    'validation': data_val_path,
    'test': data_test_path
}

all_results = {}

# Run analysis for each dataset
for dataset_name, dataset_path in datasets.items():
    print(f"Analyzing {dataset_name} dataset...")
    results = analyze_image_dataset(dataset_path, data_cat)
    all_results[dataset_name] = results
    
    # Convert to dataframe
    df = pd.DataFrame(results)
    df['min_resolution_str'] = df['min_resolution'].apply(lambda x: f"{x[0]}x{x[1]}")
    df['max_resolution_str'] = df['max_resolution'].apply(lambda x: f"{x[0]}x{x[1]}")
    
    # Display summary for this dataset
    print(f"\n{dataset_name.upper()} DATASET SUMMARY:")
    display(df[['class_name', 'num_files', 'file_size_min_kb', 
                'file_size_max_kb', 'file_size_avg_kb', 
                'min_resolution_str', 'max_resolution_str']])
    
    # Save to CSV
    df.to_csv(f'{dataset_name}_dataset_analysis.csv', index=False)
    
    # Plot distribution
    plt.figure(figsize=(12, 6))
    bars = plt.bar(df['class_name'], df['num_files'], color='skyblue')
    plt.title(f'Number of Images per Class - {dataset_name.capitalize()} Dataset')
    plt.xlabel('Class')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    # Add count labels on bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                f'{int(height)}', ha='center', va='bottom')
    
    plt.show()

# Create a comprehensive report
with open('complete_dataset_report.txt', 'w') as f:
    f.write("COMPLETE DATASET ANALYSIS REPORT\n")
    f.write("===============================\n\n")
    
    for dataset_name, results in all_results.items():
        f.write(f"{dataset_name.upper()} DATASET\n")
        f.write("-" * 20 + "\n\n")
        
        for item in results:
            f.write(f"CLASS: {item['class_name']}\n")
            f.write(f"Number of files: {item['num_files']}\n")
            f.write(f"File size range: {item['file_size_min_kb']}KB to {item['file_size_max_kb']}KB (avg: {item['file_size_avg_kb']}KB)\n")
            min_res = f"{item['min_resolution'][0]}x{item['min_resolution'][1]}"
            max_res = f"{item['max_resolution'][0]}x{item['max_resolution'][1]}"
            f.write(f"Resolution range: {min_res} to {max_res}\n")
            f.write(f"Sample files: {', '.join(str(x) for x in item['sample_files'])}\n\n")
        
        f.write("\n" + "="*50 + "\n\n")

print(f"\nAnalysis complete! Comprehensive results saved to CSV files and 'complete_dataset_report.txt'")