<a href="https://colab.research.google.com/github/Pujitha2005/Fruit/blob/main/FruitQuality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'aids-data-quest-society:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F70769%2F7731816%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240331%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240331T114013Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D3db702c3c1b9be9da8f3835d7734d938e4a7064405f0a9dc0dee9cc2c5af4ed56c3c76d62b9108746f02b4bea8b2c0feb5fb506a1f1f0b21484b3df7a62d3aa4aa6dd1bc011bc842dc7e4ff1c2805ce1cbcce3a3a7feddc12a4e1b0892564e9666c3df149b47b0d8b0c141cf29ecd5074fdc21f8519e00f756e6ef1a6b1418f9c02b45d7ef4d5154cd1c6d4af4194415ccf4eb0c9a4afec51ca77803f7d966378469498fd194efabaef30140542d771085e7d0106c1b55250c447b58d4904209bb197e38c6a0dc3b0d2b5e232cd39a47551c78d33050e8eb64044e19f06841dc55ae62cbd0bacbc88123181e5c375bf9cd5f8fba2585618503b1e0ec04097878'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras import layers
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_df = pd.read_csv('/content/train_ds.csv', index_col=0)
train_df.dropna(inplace=True)
train_df['Acidity'] = train_df['Acidity'].astype(float)
train_df['Quality'] = train_df['Quality'].map({"good": 1, "bad": 0})
plt.rcParams['figure.figsize']=[20,10]
train_df.hist()
plt.show

In [None]:
plt.rcParams['figure.figsize']=[20,30]
train_df.plot(kind='density',subplots=True,sharex=True)
plt.show()

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train = scaler.fit_transform(train_df.iloc[:, :-1])
x_train = scaled_train
y_train = train_df['Quality'].values
input_shape = [x_train.shape[1]]

In [None]:
model = keras.Sequential([
    layers.BatchNormalization(input_shape=input_shape),
    layers.Dense(units=256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(units=128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=1, activation='sigmoid')
])
model.summary()

In [None]:
model.summary()
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics='binary_accuracy'
)
history = model.fit(
    x_train, y_train,
    batch_size=64,
    epochs=100,
    callbacks=[keras.callbacks.EarlyStopping(patience=10, min_delta=0.001, restore_best_weights=True)]
)

In [None]:
test_df = pd.read_csv('/content/test_ds.csv')


In [None]:
test_df.dropna(inplace=True)
scaled_test = scaler.transform(test_df.iloc[:, 1:])
submission_predictions = model.predict(scaled_test)
submission_predictions = np.round(submission_predictions).astype(int)
submission_labels = {1: 'good', 0: 'bad'}
submission_predictions = [submission_labels[pred[0]] for pred in submission_predictions]
submission_df = pd.DataFrame({'ID': test_df['ID'], 'Quality': submission_predictions})
submission_df.to_csv('submission.csv', index=False)
submission_df

In [None]:
from sklearn.metrics import accuracy_score, mean_absolute_error, precision_score, classification_report
y_pred_train = model.predict(x_train)
y_pred_train = np.round(y_pred_train).astype(int)

In [None]:
acc=accuracy_score(y_train, y_pred_train)
acc

In [None]:
prec = precision_score(y_train, y_pred_train)
prec

In [None]:
mae = mean_absolute_error(y_train, y_pred_train)
mae

In [None]:
report=classification_report(y_train, y_pred_train)
report

In [None]:
output_file_path = '/kaggle/working/classification_report.csv'
with open(output_file_path, 'w') as f:
    f.write("Accuracy: {}\n".format(acc))
    f.write("Mean Absolute Error: {}".format(mae))
    f.write("Classification Report:\n")
    f.write(report)
output_file_path