In [9]:
import os 
import sqlite3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data_path = os.path.join('..', 'data', 'iris_data.csv')
df = pd.read_csv(data_path)
print(df.head())

X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


In [3]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred)
report_rf = classification_report(y_test, y_pred)
print("Accuracy (Random Forest):", acc_rf)
print("\nClassification Report:\n", report_rf)

Accuracy (Random Forest): 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [5]:
importance = rf.feature_importances_
feature_names = X.columns
df_importances = pd.DataFrame({
    'feature': feature_names,
    'importance': importance
}).sort_values(by='importance', ascending=False)
print("\nFeature Importance:\n", df_importances)


Feature Importance:
              feature  importance
3   petal width (cm)    0.433982
2  petal length (cm)    0.417308
0  sepal length (cm)    0.104105
1   sepal width (cm)    0.044605


In [None]:
save_path = os.path.join('..', 'data', 'feature_importances.csv')
df_importances.to_csv(save_path, index=False)

print(f"✅ Файл сохранён по пути: {save_path}")

✅ Файл сохранён по пути: ../data/feature_importances.csv


In [10]:
# Пути
db_path = os.path.join('..', 'data', 'iris_data.db')
csv_path = os.path.join('..', 'data', 'feature_importances.csv')

# Загружаем CSV
df_importances = pd.read_csv(csv_path)

# Подключаемся к базе
conn = sqlite3.connect(db_path)

# Сохраняем таблицу в базу
df_importances.to_sql('feature_importances', conn, if_exists='replace', index=False)

# Проверяем, что всё записалось
print(pd.read_sql_query("SELECT * FROM feature_importances;", conn))

# Закрываем соединение
conn.close()

             feature  importance
0   petal width (cm)    0.433982
1  petal length (cm)    0.417308
2  sepal length (cm)    0.104105
3   sepal width (cm)    0.044605
