<a href="https://colab.research.google.com/github/BrunoAFurquim/Sugarcane-Disease-Classification/blob/Exploratory-Analysis/sugarcane_disease_exploratory_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [80]:
!pip install opencv-python matplotlib numpy



In [81]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import zipfile
import seaborn as sns
import os
from google.colab import files

In [82]:
from google.colab import files
uploaded = files.upload()

In [83]:
from collections import defaultdict

class_zips = {
    'dried': 'Dried Leaves.zip',
    'healthy': 'Healthy Leaves.zip',
    'sett_rot': 'Sett Rot.zip',
    'viral_disease': 'Viral Disease.zip'
}

class_stats = defaultdict(lambda: {
    'heights': [],
    'widths': [],
    'means': [],
    'stds': [],
    'image_paths': [],
    'unique_files': set()
})

Extraindo Zips e Coletando Estatísticas

In [84]:
for class_name, zip_file in class_zips.items():
    if not os.path.exists(zip_file):
        print(f"AVISO: Arquivo {zip_file} não encontrado. Pulando...")
        continue

    extract_path = f'/content/{class_name}'

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    actual_count = 0

    for root, _, files in os.walk(extract_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root, file)

                if file not in class_stats[class_name]['unique_files']:
                    img = cv2.imread(img_path)

                    if img is not None:
                        if len(img.shape) == 2:
                            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

                        h, w, _ = img.shape
                        mean, std = cv2.meanStdDev(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

                        class_stats[class_name]['heights'].append(h)
                        class_stats[class_name]['widths'].append(w)
                        class_stats[class_name]['means'].append(mean)
                        class_stats[class_name]['stds'].append(std)
                        class_stats[class_name]['image_paths'].append(img_path)
                        class_stats[class_name]['unique_files'].add(file)
                        class_stats[class_name]['channels'] = 3
                        actual_count += 1

Função para mostrar estatísticas por classe

In [85]:
def print_class_stats(stats, class_name):
    data = stats[class_name]
    print(f"\n{'='*50}")
    print(f"Análise da classe: {class_name.upper()}")
    print(f"{'='*50}")
    print(f"Total de imagens: {len(data['unique_files'])}")
    print(f"Altura média: {np.mean(data['heights']):.2f} ± {np.std(data['heights']):.2f} px")
    print(f"Largura média: {np.mean(data['widths']):.2f} ± {np.std(data['widths']):.2f} px")

    if data['means']:
        means_stack = np.vstack([m.reshape(-1, 3) for m in data['means']])
        stds_stack = np.vstack([s.reshape(-1, 3) for s in data['stds']])

        print("\nMédia de cores (RGB):")
        for i, color in enumerate(['Red', 'Green', 'Blue']):
            print(f"{color}: {np.mean(means_stack[:,i]):.2f} ± {np.mean(stds_stack[:,i]):.2f}")

for class_name in class_zips.keys():
    if class_name in class_stats:
        print_class_stats(class_stats, class_name)



Análise da classe: DRIED
Total de imagens: 343
Altura média: 1024.00 ± 0.00 px
Largura média: 768.00 ± 0.00 px

Média de cores (RGB):
Red: 166.78 ± 26.52
Green: 162.76 ± 28.13
Blue: 156.32 ± 30.24

Análise da classe: HEALTHY
Total de imagens: 430
Altura média: 1008.37 ± 82.20 px
Largura média: 768.00 ± 0.00 px

Média de cores (RGB):
Red: 99.66 ± 43.81
Green: 119.20 ± 47.32
Blue: 96.39 ± 49.43

Análise da classe: SETT_ROT
Total de imagens: 652
Altura média: 1024.00 ± 0.00 px
Largura média: 768.00 ± 0.00 px

Média de cores (RGB):
Red: 128.70 ± 48.02
Green: 119.97 ± 49.76
Blue: 113.28 ± 45.96

Análise da classe: VIRAL_DISEASE
Total de imagens: 663
Altura média: 950.35 ± 166.05 px
Largura média: 768.00 ± 0.00 px

Média de cores (RGB):
Red: 158.54 ± 35.23
Green: 161.26 ± 30.45
Blue: 156.83 ± 38.94


Bloxplot de dimensões

In [None]:
import plotly.express as px

plt.figure(figsize=(15, 8))
for i, class_name in enumerate(class_stats, 1):
    if not class_stats[class_name]['image_paths']:
        print(f"Warning: No images found for class {class_name}")
        continue

    img_path = class_stats[class_name]['image_paths'][0]

    img = cv2.imread(img_path)
    if img is None:
        print(f"Warning: Failed to load {img_path}")
        continue

    channels = class_stats[class_name].get('channels',
                                         img.shape[2] if len(img.shape) == 3 else 1)

    plt.subplot(2, 2, i)
    if channels == 1:
        plt.imshow(img, cmap='gray')
    else:
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    plt.title(f'Exemplo: {class_name}')
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import plotly.express as px

counts = [len(class_stats[cls]['image_paths']) for cls in class_stats]
fig = px.pie(names=list(class_stats.keys()), values=counts, title='Distribuição de Classes')
fig.show()

In [None]:
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
all_heights = [class_stats[cls]['heights'] for cls in class_stats]
bp = plt.boxplot(all_heights, tick_labels=list(class_stats.keys()), patch_artist=True)

colors = ['lightgreen', 'lightblue', 'salmon', 'gold']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

plt.title('Distribuição de Alturas por Classe\n(com outliers)', fontsize=12)
plt.ylabel('Pixels')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 2)

intensities_rgb = {cls: {'R': [], 'G': [], 'B': []} for cls in class_stats}

for cls in class_stats:
    means = np.concatenate(class_stats[cls]['means'])

    if means.shape[1] == 3:
        intensities_rgb[cls]['R'] = means[:, 0]
        intensities_rgb[cls]['G'] = means[:, 1]
        intensities_rgb[cls]['B'] = means[:, 2]
    else:
        intensities_rgb[cls]['R'] = means.flatten()
        intensities_rgb[cls]['G'] = means.flatten()
        intensities_rgb[cls]['B'] = means.flatten()

colors = ['red', 'green', 'blue']
box_width = 0.15
spacing = 0.2
class_positions = np.arange(len(class_stats))

for i, channel in enumerate(['R', 'G', 'B']):
    data = [intensities_rgb[cls][channel] for cls in class_stats]
    positions = class_positions + i * spacing

    bp = plt.boxplot(data, positions=positions, widths=box_width,
                    patch_artist=True,
                    tick_labels=[cls if i == 1 else '' for cls in class_stats])

    for box in bp['boxes']:
        box.set_facecolor(colors[i])

plt.title('Intensidade por Canal RGB e Classe', fontsize=12)
plt.ylabel('Intensidade Média')
plt.xticks(class_positions + spacing, class_stats.keys(), rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)

legend_patches = [plt.Rectangle((0,0), 1, 1, fc=color)
                 for color in colors]
plt.legend(legend_patches, ['Red', 'Green', 'Blue'], loc='upper right')

plt.tight_layout()
plt.show()

# Análises de Distribuição de Canais RGB

In [None]:
plt.figure(figsize=(15, 10))
for i, cls in enumerate(class_stats.keys()):
    means = np.concatenate(class_stats[cls]['means'])

    plt.subplot(2, 2, i+1)

    if means.shape[1] == 3:
        sns.kdeplot(means[:, 0], label='Red', color='r')
        sns.kdeplot(means[:, 1], label='Green', color='g')
        sns.kdeplot(means[:, 2], label='Blue', color='b')
    else:
        sns.kdeplot(means.flatten(), label='Gray', color='gray')

    plt.title(f'Distribuição de Cores - {cls}')
    plt.legend()
    plt.xlabel('Intensidade do Pixel')
    plt.ylabel('Densidade')

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12,8))
for i, cls in enumerate(class_stats):
    img = cv2.imread(class_stats[cls]['image_paths'][0])
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    plt.subplot(2,2,i+1)
    for j, color in enumerate(['r','g','b']):
        hist = cv2.calcHist([img_rgb], [j], None, [256], [0,256])
        plt.plot(hist, color=color)
    plt.title(f'Histograma - {cls}')
plt.tight_layout()

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

data = []
for cls in class_stats:
    means = np.concatenate(class_stats[cls]['means'])

    if means.shape[1] == 1:
        for m in means:
            data.append({'Class': cls, 'R': m[0], 'G': m[0], 'B': m[0]})
    else:
        for m in means:
            data.append({'Class': cls, 'R': m[0], 'G': m[1], 'B': m[2]})

df = pd.DataFrame(data)

g = sns.pairplot(df, hue='Class', palette='viridis')
g.fig.suptitle("Distribuição das Intensidades de Cor por Canal RGB", y=1.02)

plt.show()

# Análise de Textura

In [None]:
from skimage.feature import graycomatrix, graycoprops

def calc_texture(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256)
    return graycoprops(glcm, 'contrast')[0,0], graycoprops(glcm, 'energy')[0,0]

# Aplicar em uma imagem por classe
for cls in class_stats:
    img = cv2.imread(class_stats[cls]['image_paths'][0])
    contrast, energy = calc_texture(img)
    print(f"{cls}: Contraste={contrast:.2f}, Energia={energy:.2f}")

# Análise de Bordas

In [None]:
def analyze_edges(img_path):
    img = cv2.imread(img_path, 0)
    edges = cv2.Canny(img, 100, 200)
    edge_pixels = np.sum(edges > 0) / edges.size
    return edge_pixels

# Calcular para 50 imagens de cada classe
for cls in class_stats:
    edge_density = [analyze_edges(p) for p in class_stats[cls]['image_paths'][:50]]
    print(f"{cls}: Densidade média de bordas = {np.mean(edge_density):.2%}")

# Análise de Contraste

In [None]:
import pandas as pd

def calc_brightness_contrast(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    return np.mean(l), np.std(l)

results = {}
for cls in class_stats:
    img = cv2.imread(class_stats[cls]['image_paths'][0])
    brightness, contrast = calc_brightness_contrast(img)
    results[cls] = {'Brilho': brightness, 'Contraste': contrast}

pd.DataFrame(results).T.plot(kind='bar')