# Welcome to Yandex ML Handbook Tasks

This notebook demonstrates that the data science environment is set up correctly.

In [None]:
# Import essential data science libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

print("✅ All libraries imported successfully!")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"Matplotlib version: {plt.matplotlib.__version__}")

In [None]:
# Create a simple synthetic dataset
X, y = make_classification(
    n_samples=1000, 
    n_features=20, 
    n_informative=15, 
    n_redundant=5, 
    random_state=42
)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")

In [None]:
# Train a simple model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions
y_pred = rf.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))

In [None]:
# Create a simple visualization
plt.figure(figsize=(10, 6))

# Feature importance plot
feature_importance = rf.feature_importances_
features = [f'Feature_{i}' for i in range(len(feature_importance))]

plt.subplot(1, 2, 1)
plt.bar(range(len(feature_importance)), feature_importance)
plt.title('Feature Importance')
plt.xlabel('Features')
plt.ylabel('Importance')

# Distribution plot
plt.subplot(1, 2, 2)
plt.hist(X_train[:, 0], alpha=0.7, label='Feature 0', bins=30)
plt.title('Feature Distribution')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()

plt.tight_layout()
plt.show()

print("🎉 Environment setup is complete and working!")