##Практическая работа 1
#Создание ML-пайплайна, Git и FastAPI

Цель работы:

1) Автоматизировать предобработку данных и обучение модели с помощью пайплайна

2) Использовать Git для версионирования кода

3) Развернуть FastAPI-сервис для получения предсказаний

## Установка необходимых библиотек

In [None]:
!pip install xgboost scikit-learn joblib pandas numpy fastapi uvicorn pyngrok python-multipart

Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.7-py3-none-any.whl.metadata (9.4 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.2-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.7-py3-none-any.whl (23 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Downloading starlette-0.46.2-py3-none-any.whl (72 kB)
[2K   [90m

## Подключение Google Диска

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Загрузка данных, обучение модели и сохранение

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib

file_path = '/content/drive/MyDrive/BobrovPR1/Laptop_price.csv'
df = pd.read_csv(file_path)

X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

num_features = ['Screen_Size', 'Weight', 'Processor_Speed', 'RAM_Size', 'Storage_Capacity']
cat_features = ['Brand']

num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])

pipeline.fit(X_train, y_train)
joblib.dump(pipeline, '/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl')

['/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl']

## Версионирование проекта с Git


In [None]:
!git init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [None]:
!git config --global user.email "darkfire806@gmail.com"
!git config --global user.name "BobrovDE-UBVT2304"
!git add *.

error: open("drive/MyDrive/Новый документ.gdoc"): Operation not supported
error: unable to index file 'drive/MyDrive/Новый документ.gdoc'
fatal: adding files failed


In [None]:
!git commit -m "Добавлен ML-пайплайн"

[main 2173d47] Добавлен ML-пайплайн
 1 file changed, 1 insertion(+), 1 deletion(-)


In [None]:
from getpass import getpass
import os

token = getpass('Вставьте сюда свой GitHub токен: ')
os.environ['GIT_TOKEN'] = token

!git remote add origin https://BobrovDE:$GIT_TOKEN@github.com/BobrovDE-UBVT2304/BobrovFinal.git

Вставьте сюда свой GitHub токен: ··········


In [None]:
!git branch -M main
!git push -u origin main

Enumerating objects: 106, done.
Counting objects:   0% (1/106)Counting objects:   1% (2/106)Counting objects:   2% (3/106)Counting objects:   3% (4/106)Counting objects:   4% (5/106)Counting objects:   5% (6/106)Counting objects:   6% (7/106)Counting objects:   7% (8/106)Counting objects:   8% (9/106)Counting objects:   9% (10/106)Counting objects:  10% (11/106)Counting objects:  11% (12/106)Counting objects:  12% (13/106)Counting objects:  13% (14/106)Counting objects:  14% (15/106)Counting objects:  15% (16/106)Counting objects:  16% (17/106)Counting objects:  17% (19/106)Counting objects:  18% (20/106)Counting objects:  19% (21/106)Counting objects:  20% (22/106)Counting objects:  21% (23/106)Counting objects:  22% (24/106)Counting objects:  23% (25/106)Counting objects:  24% (26/106)Counting objects:  25% (27/106)Counting objects:  26% (28/106)Counting objects:  27% (29/106)Counting objects:  28% (30/106)Counting objects:  29% (31/106)Counting objects:

## Создание FastAPI приложения

In [None]:
%%writefile app.py
from fastapi import FastAPI, File, UploadFile
import pandas as pd
import joblib
from io import BytesIO

app = FastAPI()
model_path = "/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}

Writing app.py


## Запуск сервера и подключение ngrok

In [22]:
!ngrok config add-authtoken 2wojUOiB64LqXv1zEjySINqtbCa_6z5N7VWdxxaZmddzjsdzb

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [23]:
!nohup uvicorn app:app --host 0.0.0.0 --port 8000 --reload > fastapi.log 2>&1 &

In [24]:
from pyngrok import ngrok
public_url = ngrok.connect(8000)
print("API доступно по адресу:", public_url)

API доступно по адресу: NgrokTunnel: "https://3fcd-35-201-213-234.ngrok-free.app" -> "http://localhost:8000"
