In [1]:
# Google Drive API
from google.colab import drive

# Drive bağlantısını oluştur
drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [3]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer


# Veri klasörlerini belirt
okra_path = "/content/gdrive/MyDrive/AliBaki_TURKOZ_Engineering_Project/Data_Sets/Raisin/Okralı/Absorbance"
normal_path = "/content/gdrive/MyDrive/AliBaki_TURKOZ_Engineering_Project/Data_Sets/Raisin/Sağlıklı/Absorbance"

# .csv dosyalarını oku ve birleştir
def read_and_combine(path, label):
    dataframes = []
    for file_name in os.listdir(path):
        if file_name.endswith(".csv"):
            file_path = os.path.join(path, file_name)
            df = pd.read_csv(file_path)
           # df = df.fillna(df.mean())  # NaN değerleri ortalama ile doldur
            df['Label'] = label
            dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)

okra_data = read_and_combine(okra_path, 'uzum_verileri_okra')
normal_data = read_and_combine(normal_path, 'uzum_verileri_normal')

# Veriyi birleştir
merged_data = pd.concat([okra_data, normal_data], ignore_index=True)

# NaN değerleri ortalamalarla doldur
#imputer = SimpleImputer(strategy='mean')
#merged_data[['Wavelength (nm)','Absorbance (AU)']] = imputer.fit_transform(merged_data[['Wavelength (nm)','Absorbance (AU)']])

# Veriyi sınıflandırma ve eğitim/test setlerine bölme
X = merged_data[['Wavelength (nm)','Absorbance (AU)']]
y = merged_data['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Sınıflandırma modeli oluştur
#clf = RandomForestClassifier(random_state=42)
clf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None,
                                     min_samples_split=6, min_samples_leaf=6, random_state=42)
clf.fit(X_train, y_train)

# Test seti üzerinde modelin performansını değerlendir
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Random_Forest_Classifier_Doğruluk Oranı: {accuracy}")


Random_Forest_Classifier_Doğruluk Oranı: 0.6391228070175439


# Kopya Dosyalar ile deneme

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Veri klasörlerini belirt
okra_path = '/content/gdrive/MyDrive/AliBaki_TURKOZ_Engineering_Project/Data_Sets/Raisin/Raisins_20Data/Number_of_scan_40 - resouliton_228/Okralı/Black_Box/Factory_Reference/Reflectance'
normal_path = '/content/gdrive/MyDrive/AliBaki_TURKOZ_Engineering_Project/Data_Sets/Raisin/Raisins_20Data/Number_of_scan_40 - resouliton_228/Normal/Black_Box/Factory_Reference/Reflectance'

# .csv dosyalarını oku ve birleştir
def read_and_combine(path, label):
    dataframes = []
    for file_name in os.listdir(path):
        try:
            file_path = os.path.join(path, file_name)
            df = pd.read_csv(file_path)
            df = df.fillna(df.mean())  # NaN değerleri ortalama ile doldur
            df['Label'] = label
            dataframes.append(df)
        except pd.errors.EmptyDataError:
            print(f"Ignoring empty CSV file: {file_name}")
        except Exception as e:
            print(f"Ignoring non-CSV file: {file_name} ({e})")
    if not dataframes:
        raise ValueError("No CSV files found in the specified directory.")
    return pd.concat(dataframes, ignore_index=True)

okra_data = read_and_combine(okra_path, 'uzum_verileri_okra')
normal_data = read_and_combine(normal_path, 'uzum_verileri_normal')

# Veriyi birleştir
merged_data = pd.concat([okra_data, normal_data], ignore_index=True)

# NaN değerleri ortalamalarla doldur
# imputer = SimpleImputer(strategy='mean')
# merged_data[['Wavelength (nm)','Absorbance (AU)']] = imputer.fit_transform(merged_data[['Wavelength (nm)','Absorbance (AU)']])

# Veriyi sınıflandırma ve eğitim/test setlerine bölme
X = merged_data[['Wavelength (nm)','Reflectance (AU)']]
y = merged_data['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Sınıflandırma modeli oluştur
clf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None,
                             min_samples_split=6, min_samples_leaf=6, random_state=42)
clf.fit(X_train, y_train)

# Test seti üzerinde modelin performansını değerlendir
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Random_Forest_Classifier_Doğruluk Oranı: {accuracy}")


Random_Forest_Classifier_Doğruluk Oranı: 0.7850877192982456
