In [3]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn import model_selection as ms
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn import linear_model, naive_bayes, neighbors, svm, tree, ensemble, neural_network, discriminant_analysis

d_test = pd.read_excel(r"E:\DeepLearning\agriculture\test_agriculture.xlsx")
d_train = pd.read_csv(r"E:\DeepLearning\agriculture\train_agriculture.csv")

imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')

x = d_train.iloc[:,1:-1]
y = d_train.iloc[:,-1]

In [4]:
d_test

Unnamed: 0,ID,Estimated_Insects_Count,Crop_Type,Soil_Type,Pesticide_Use_Category,Number_Doses_Week,Number_Weeks_Used,Number_Weeks_Quit,Season
0,F00000002,188,1,1,1,0,,0,2
1,F00000007,410,1,1,1,0,0.0,0,2
2,F00000011,626,1,0,1,0,0.0,0,2
3,F00000013,731,1,0,1,0,0.0,0,2
4,F00000014,789,0,0,1,0,0.0,0,1
...,...,...,...,...,...,...,...,...,...
1194,F00003133,1132,0,1,2,40,34.0,7,3
1195,F00003135,1132,0,1,2,40,25.0,3,1
1196,F00003138,1132,0,1,2,20,13.0,23,1
1197,F00003149,1212,0,1,2,10,35.0,5,1


In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report

X, y = make_classification(n_samples=10000, random_state=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=1)

clf = MLPClassifier(
    random_state=1,
    max_iter=500,
    hidden_layer_sizes=(200, 100), 
    activation='relu',
    alpha=0.001 
)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.94      0.93      1247
           1       0.94      0.92      0.93      1253

    accuracy                           0.93      2500
   macro avg       0.93      0.93      0.93      2500
weighted avg       0.93      0.93      0.93      2500



In [6]:
from sklearn.metrics import f1_score
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

hidden_layer_configs = {
    '1 Lớp ẩn': (100,),
    '2 Lớp ẩn': (100, 50),
    '3 Lớp ẩn (100, 50, 25 neurons)': (100, 50, 25)
}


for name, config in hidden_layer_configs.items():
    print(f"{name} ---")
    
    clf = MLPClassifier(hidden_layer_sizes=config, max_iter=500, random_state=42)
    

    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    
    f1 = f1_score(y_test, y_pred, average='weighted')
    print(f"F1-score: {f1:.4f}\n")

1 Lớp ẩn ---




F1-score: 0.8161

2 Lớp ẩn ---
F1-score: 0.8162

3 Lớp ẩn (100, 50, 25 neurons) ---
F1-score: 0.8322



# Layer với Pipline

In [7]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipeline_mlp = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(hidden_layer_sizes=(100, 50, 25), max_iter=500, random_state=42))
])

pipeline_mlp.fit(X_train, y_train)

y_pred = pipeline_mlp.predict(X_test)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"{f1:.4f}")

0.8361


# So sánh với các model khác

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import f1_score

from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

models = {
    'MLPClassifier': MLPClassifier(max_iter=500, random_state=42),
    'LogisticRegression': LogisticRegression(random_state=42),
    'GaussianNB': GaussianNB(),
    'KNeighborsClassifier': KNeighborsClassifier(),
    'SVC': SVC(random_state=42),
    'DecisionTreeClassifier': DecisionTreeClassifier(random_state=42),
    'RandomForestClassifier': RandomForestClassifier(random_state=42),
    'GradientBoostingClassifier': GradientBoostingClassifier(random_state=42),
    'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis()
}


for name, model in models.items():
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    f1 = f1_score(y_test, y_pred, average='weighted')
    print(f"Mô hình: {name}:====>>F1-score: {f1:.4f} <<====")



Mô hình: MLPClassifier:====>>F1-score: 0.8161 <<====
Mô hình: LogisticRegression:====>>F1-score: 0.8522 <<====
Mô hình: GaussianNB:====>>F1-score: 0.7954 <<====
Mô hình: KNeighborsClassifier:====>>F1-score: 0.8079 <<====
Mô hình: SVC:====>>F1-score: 0.8442 <<====
Mô hình: DecisionTreeClassifier:====>>F1-score: 0.8402 <<====
Mô hình: RandomForestClassifier:====>>F1-score: 0.8802 <<====
Mô hình: GradientBoostingClassifier:====>>F1-score: 0.8802 <<====
Mô hình: LinearDiscriminantAnalysis:====>>F1-score: 0.8440 <<====


# BernoulliRBM

In [9]:
import pandas as np
from sklearn.neural_network import BernoulliRBM

In [10]:
X, y = make_classification(n_samples=10000, n_features=50, n_informative=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [12]:
from sklearn.metrics import accuracy_score
X, y = make_classification(n_samples=10000, n_features=50, n_informative=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

pipeline_rbm = Pipeline(steps=[
    ('rbm', BernoulliRBM(n_components=30, learning_rate=0.005, batch_size=100, n_iter=500, random_state=42)),
    ('classifier', LogisticRegression(random_state=42)) # Added a classifier as the last step
])

pipeline_rbm.fit(X_train, y_train)

y_pred = pipeline_rbm.predict(X_test)

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.6028


In [None]:
pipeline = Pipeline(steps=[
    ('rbm', BernoulliRBM(random_state=42))
])

param_grid = {
    'rbm__n_components': [50, 100, 150],
    'rbm__learning_rate': [0.01, 0.05, 0.1],
    'rbm__n_iter': [100, 200, 500]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='f1_weighted', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Các tham số tốt nhất:", grid_search.best_params_)
print("F1-score tốt nhất:", grid_search.best_score_)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [None]:
best_pipeline = grid_search.best_estimator_
y_pred = best_pipeline.predict(X_test)
f1_final = f1_score(y_test, y_pred, average='weighted')

print("F1-score cuối cùng trên tập kiểm tra:", f1_final)

In [13]:
rbm = BernoulliRBM(n_components=30, n_iter=50, random_state=42)
rbm.fit(X_train)

print("Các tham số của BernoulliRBM ")

Các tham số của BernoulliRBM 


#  Hiển thị các tham số

In [21]:
print("2 LỚP ẨN")
clf_2_layers = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
clf_2_layers.fit(X_train, y_train)

print("\nKích thước weights:")
for i, layer_weights in enumerate(clf_2_layers.coefs_):
    print(f"  - Lớp {i+1}: {layer_weights.shape}")

print("\nKích thước biases:")
for i, layer_biases in enumerate(clf_2_layers.intercepts_):
    print(f"  - Lớp {i+1}: {layer_biases.shape}")

print("\n" + "="*50 + "\n")

# 3 lớp ẩn 
print("THAM SỐ VỚI 3 LỚP ẨN")
clf_3_layers = MLPClassifier(hidden_layer_sizes=(100, 50, 25), max_iter=500, random_state=42)
clf_3_layers.fit(X_train, y_train)


print("weights:")
for i, layer_weights in enumerate(clf_3_layers.coefs_):
    print(f"  - Lớp {i+1}: {layer_weights.shape}")

print("biases:")
for i, layer_biases in enumerate(clf_3_layers.intercepts_):
    print(f"  - Lớp {i+1}: {layer_biases.shape}")


2 LỚP ẨN

Kích thước weights:
  - Lớp 1: (50, 100)
  - Lớp 2: (100, 50)
  - Lớp 3: (50, 1)

Kích thước biases:
  - Lớp 1: (100,)
  - Lớp 2: (50,)
  - Lớp 3: (1,)


THAM SỐ VỚI 3 LỚP ẨN
weights:
  - Lớp 1: (50, 100)
  - Lớp 2: (100, 50)
  - Lớp 3: (50, 25)
  - Lớp 4: (25, 1)
biases:
  - Lớp 1: (100,)
  - Lớp 2: (50,)
  - Lớp 3: (25,)
  - Lớp 4: (1,)


In [23]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=50, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print("2 LỚP ẨN ")
clf_2_layers = MLPClassifier(hidden_layer_sizes=(100, 50), alpha=0.01, max_iter=500, random_state=42)
clf_2_layers.fit(X_train, y_train)

print("\nKích thước weights:")
for i, layer_weights in enumerate(clf_2_layers.coefs_):
    print(f"  - Lớp {i+1}: {layer_weights.shape}")

print("\nKích thước biases:")
for i, layer_biases in enumerate(clf_2_layers.intercepts_):
    print(f"  - Lớp {i+1}: {layer_biases.shape}")

print("\n" + "="*50 + "\n")

#3 LỚP ẨN
print("THAM SỐ VỚI 3 LỚP ẨN ")

clf_3_layers = MLPClassifier(hidden_layer_sizes=(100, 50, 25), alpha=0.01, max_iter=500, random_state=42)
clf_3_layers.fit(X_train, y_train)


print("\nKích thước weights:")
for i, layer_weights in enumerate(clf_3_layers.coefs_):
    print(f"  - Lớp {i+1}: {layer_weights.shape}")

print("\nKích thước biases:")
for i, layer_biases in enumerate(clf_3_layers.intercepts_):
    print(f"  - Lớp {i+1}: {layer_biases.shape}")

2 LỚP ẨN 

Kích thước weights:
  - Lớp 1: (50, 100)
  - Lớp 2: (100, 50)
  - Lớp 3: (50, 1)

Kích thước biases:
  - Lớp 1: (100,)
  - Lớp 2: (50,)
  - Lớp 3: (1,)


THAM SỐ VỚI 3 LỚP ẨN 

Kích thước weights:
  - Lớp 1: (50, 100)
  - Lớp 2: (100, 50)
  - Lớp 3: (50, 25)
  - Lớp 4: (25, 1)

Kích thước biases:
  - Lớp 1: (100,)
  - Lớp 2: (50,)
  - Lớp 3: (25,)
  - Lớp 4: (1,)


# 2, 3 lớp với Dropout

In [45]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

X, y = make_classification(n_samples=1000, n_features=50, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print("2 lớp ẩn và Dropout")

model_2_layers = Sequential([
    Dense(100, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(50, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model_2_layers.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_2_layers.fit(X_train, y_train, epochs=80, batch_size=64, verbose=0)

y_pred_2 = (model_2_layers.predict(X_test) > 0.5).astype("int32")
accuracy_2 = accuracy_score(y_test, y_pred_2)

print(f"\nĐộ chính xác của mô hình 2 lớp ẩn với Dropout: {accuracy_2:.4f}")
print("--- Cấu trúc mô hình ---")
model_2_layers.summary()


print("\n" + "="*50 + "\n")

print("3 lớp ẩn và Dropout")

model_3_layers = Sequential([
    Dense(100, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.4),
    Dense(50, activation='relu'),
    Dropout(0.4),
    Dense(25, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

model_3_layers.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_3_layers.fit(X_train, y_train, epochs=80, batch_size=64, verbose=0)

y_pred_3 = (model_3_layers.predict(X_test) > 0.5).astype("int32")
accuracy_3 = accuracy_score(y_test, y_pred_3)

print(f"\nĐộ chính xác của mô hình 3 lớp ẩn với Dropout: {accuracy_3:.4f}")
print("--- Cấu trúc mô hình ---")
model_3_layers.summary()

2 lớp ẩn và Dropout
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 

Độ chính xác của mô hình 2 lớp ẩn với Dropout: 0.8440
--- Cấu trúc mô hình ---




3 lớp ẩn và Dropout
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 

Độ chính xác của mô hình 3 lớp ẩn với Dropout: 0.8600
--- Cấu trúc mô hình ---


# regularization L2

In [59]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
from sklearn.neural_network import MLPClassifier

# Tạo dữ liệu
X, y = make_classification(n_samples=1000, n_features=50, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print("2 lớp ẩn và regularization L2 (tương tự Dropout)")

model_2_layers = MLPClassifier(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    alpha=0.01,  # L2 regularization
    max_iter=500,
    early_stopping=True,
    random_state=42
)

model_2_layers.fit(X_train, y_train)
y_pred_2 = model_2_layers.predict(X_test)
accuracy_2 = accuracy_score(y_test, y_pred_2)

print(f"\nĐộ chính xác của mô hình 2 lớp ẩn: {accuracy_2:.4f}")
print("\n--- Cấu trúc mô hình 2 lớp ---")
print(model_2_layers)

print("\n" + "="*50 + "\n")

print("3 lớp ẩn và regularization L2")

model_3_layers = MLPClassifier(
    hidden_layer_sizes=(100, 50, 25),
    activation='relu',
    solver='adam',
    alpha=0.01,
    batch_size=32,
    max_iter=500,
    early_stopping=True,
    random_state=42
)

model_3_layers.fit(X_train, y_train)
y_pred_3 = model_3_layers.predict(X_test)
accuracy_3 = accuracy_score(y_test, y_pred_3)

print(f"\nĐộ chính xác của mô hình 3 lớp ẩn: {accuracy_3:.4f}")
print("\n--- Cấu trúc mô hình 3 lớp ---")
print(model_3_layers)


2 lớp ẩn và regularization L2 (tương tự Dropout)

Độ chính xác của mô hình 2 lớp ẩn: 0.8560

--- Cấu trúc mô hình 2 lớp ---
MLPClassifier(alpha=0.01, early_stopping=True, hidden_layer_sizes=(100, 50),
              max_iter=500, random_state=42)


3 lớp ẩn và regularization L2

Độ chính xác của mô hình 3 lớp ẩn: 0.8520

--- Cấu trúc mô hình 3 lớp ---
MLPClassifier(alpha=0.01, batch_size=32, early_stopping=True,
              hidden_layer_sizes=(100, 50, 25), max_iter=500, random_state=42)
