# Cheat sheet

## Основни концепции

### Train/Test Split
Разделяне на данните на тренировъчен и тестов набор:

In [2]:
from sklearn.model_selection import train_test_split

# X - характеристиките (features), y - целевата променлива (target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Cross-Validation
Метод за оценка на модела чрез разделяне на данните на 𝑘-части:

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)
scores = cross_val_score(model, X, y, cv=5)  # 5-fold Cross-Validation
print("Средна точност:", scores.mean())


### Gradient Descent
Итеративен метод за минимизиране на функция на загуба:

In [None]:
# Пример с ръчно изчисление на градиентен спуск
import numpy as np

# Функция на загуба (проста квадратична)
def loss_function(theta, X, y):
    return np.mean((X.dot(theta) - y)**2)

# Градиент
def gradient(theta, X, y):
    return 2 * X.T.dot(X.dot(theta) - y) / len(y)

# Инициализация
theta = np.random.randn(X.shape[1])  # начални параметри
learning_rate = 0.01
iterations = 100

for i in range(iterations):
    theta -= learning_rate * gradient(theta, X, y)
    print(f"Итерация {i+1}, загуба: {loss_function(theta, X, y)}")

### Regularization
Регуларизацията предпазва от overfitting:

* L1 (Lasso): Прибавя абсолютната стойност на коефициентите.
* L2 (Ridge): Прибавя квадрата на коефициентите.

In [None]:
from sklearn.linear_model import Ridge, Lasso

# Ridge Regression
ridge = Ridge(alpha=1.0)  # alpha контролира силата на регуляризацията
ridge.fit(X_train, y_train)

# Lasso Regression
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)


## Метрики за оценка 
За регресия
### Mean Absolute Error (MAE):

In [None]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y_pred)
print("MAE:", mae)


### Mean Squared Error (MSE):

In [None]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)

### R^2 Score:

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print("R^2 Score:", r2)

За класификация
### Accuracy:

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("Точност (Accuracy):", accuracy)

### Precision, Recall, F1-Score:

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)


### Confusion Matrix:

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Предсказани')
plt.ylabel('Истински')
plt.show()


### ROC-AUC (за бинарна класификация):


In [None]:
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

y_proba = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_proba)
print("ROC-AUC:", roc_auc)

fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label=f"ROC-AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()
