# Практическая работа: Предсказание цены ноутбуков + FastAPI + Git

## Установка зависимостей

In [None]:
!pip install xgboost scikit-learn joblib pandas numpy fastapi uvicorn pyngrok python-multipart

## Подключение Google Диска

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Загрузка данных, обучение модели и сохранение

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib

file_path = '/content/drive/MyDrive/BobrovPR1/Laptop_price.csv'
df = pd.read_csv(file_path)

X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

num_features = ['Screen_Size', 'Weight', 'Processor_Speed', 'RAM_Size', 'Storage_Capacity']
cat_features = ['Brand']

num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])

pipeline.fit(X_train, y_train)
joblib.dump(pipeline, '/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl')

## Создание FastAPI приложения

In [None]:
%%writefile app.py
from fastapi import FastAPI, File, UploadFile
import pandas as pd
import joblib
from io import BytesIO

app = FastAPI()
model_path = "/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}

## Генерация тестового CSV файла

In [None]:
import pandas as pd
data = [
    [15.6, 2.2, 'Dell', 2.5, 8, 256],
    [17.3, 2.5, 'HP', 2.8, 16, 1024]
]
columns = ['Screen_Size', 'Weight', 'Brand', 'Processor_Speed', 'RAM_Size', 'Storage_Capacity']
df_test = pd.DataFrame(data, columns=columns)
df_test.to_csv('/mnt/data/test_input_final_clean.csv', index=False)

## Запуск сервера и подключение ngrok

In [None]:
!nohup uvicorn app:app --host 0.0.0.0 --port 8000 --reload > fastapi.log 2>&1 &

In [None]:
from pyngrok import ngrok
public_url = ngrok.connect(8000)
print("API доступно по адресу:", public_url)

## Версионирование проекта с Git (по инструкции)

In [None]:
!git init

In [None]:
!git add *.ipynb app.py *.pkl *.csv

In [None]:
!git commit -m "Первый коммит: ноутбук, модель и API"

In [None]:
!git remote add origin https://github.com/BobrovDE-UBVT2304/BobrovDE.git

In [None]:
!git branch -M main
!git push -u origin main