# Часть 1: Установка зависимостей

In [None]:
!pip install xgboost scikit-learn joblib pandas numpy fastapi uvicorn pyngrok python-multipart

Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.7-py3-none-any.whl.metadata (9.4 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.2-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.7-py3-none-any.whl (23 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Downloading starlette-0.46.2-py3-none-any.whl (72 kB)
[2K   [90m

# Часть 2: Подключение Google Диска

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Часть 2: Обработка CSV и обучение модели

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib

file_path = '/content/drive/MyDrive/BobrovPR1/Laptop_price.csv'
df = pd.read_csv(file_path)

X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

num_features = ['Screen_Size', 'Weight', 'Processor_Speed', 'RAM_Size', 'Storage_Capacity']
cat_features = ['Brand']

num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])

pipeline.fit(X_train, y_train)
joblib.dump(pipeline, '/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl')

['/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl']

# Часть 3: FastAPI-приложение

In [None]:
%%writefile app.py
from fastapi import FastAPI, File, UploadFile
import pandas as pd
import joblib
from io import BytesIO

app = FastAPI()
model_path = "/content/drive/MyDrive/BobrovPR1/laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}

Writing app.py


# Часть 4: Версионирование с Git

In [97]:
# Эти команды нужно запускать локально или в Colab через терминал
!git init

Reinitialized existing Git repository in /content/.git/


In [98]:
!git add .

error: open("drive/MyDrive/Новый документ.gdoc"): Operation not supported
error: unable to index file 'drive/MyDrive/Новый документ.gdoc'
fatal: adding files failed


In [99]:
!git commit -m "Добвлен ML-пайплайн"

On branch main
Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   drive/MyDrive/Colab Notebooks/PR1 BOBROV D.E UBVT2304.ipynb[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.config/[m
	[31m"drive/MyDrive/\320\222\320\270\320\264\320\265\320\276/"[m
	[31m"drive/MyDrive/\320\227\320\260\320\272\320\260\321\202\321\213/"[m
	[31m"drive/MyDrive/\320\235\320\276\320\262\321\213\320\270\314\206 \320\264\320\276\320\272\321\203\320\274\320\265\320\275\321\202.gdoc"[m
	[31m"drive/MyDrive/\320\236\321\202\320\264\321\213\321\205/"[m
	[31m"drive/MyDrive/\320\237\320\224\320\224/"[m
	[31m"drive/MyDrive/\320\237\320\260\321\200\320\276\320\273\320\270/"[m
	[31m"drive/MyDrive/\320\244\320\276\321\202\320\272\320\270 \320\272\320\260\320\272\320\270\320\265-\321\202\320\276 /"[m
	[31msample_data/README.md[m
	[

In [92]:
!git remote add origin https://github.com/BobrovDE-UBVT2304/BobrovDE.git

error: remote origin already exists.


In [93]:
!git push -u origin main

To https://github.com/BobrovDE-UBVT2304/BobrovDE.git
 [31m! [rejected]       [m main -> main (non-fast-forward)
[31merror: failed to push some refs to 'https://github.com/BobrovDE-UBVT2304/BobrovDE.git'
[m[33mhint: Updates were rejected because the tip of your current branch is behind[m
[33mhint: its remote counterpart. Integrate the remote changes (e.g.[m
[33mhint: 'git pull ...') before pushing again.[m
[33mhint: See the 'Note about fast-forwards' in 'git push --help' for details.[m


In [86]:
# Подтягиваем изменения с GitHub, если есть конфликты
!git pull origin main --allow-unrelated-histories

# Пушим наш код в GitHub
!git push -u origin main

From https://github.com/BobrovDE-UBVT2304/BobrovDE
 * branch            main       -> FETCH_HEAD
[33mhint: You have divergent branches and need to specify how to reconcile them.[m
[33mhint: You can do so by running one of the following commands sometime before[m
[33mhint: your next pull:[m
[33mhint: [m
[33mhint:   git config pull.rebase false  # merge (the default strategy)[m
[33mhint:   git config pull.rebase true   # rebase[m
[33mhint:   git config pull.ff only       # fast-forward only[m
[33mhint: [m
[33mhint: You can replace "git config" with "git config --global" to set a default[m
[33mhint: preference for all repositories. You can also pass --rebase, --no-rebase,[m
[33mhint: or --ff-only on the command line to override the configured default per[m
[33mhint: invocation.[m
fatal: Need to specify how to reconcile divergent branches.
To https://github.com/BobrovDE-UBVT2304/BobrovDE.git
 [31m! [rejected]       [m main -> main (non-fast-forward)
[31merror: fa

In [87]:
from getpass import getpass
import os

token = getpass('Вставьте сюда свой GitHub токен: ')
os.environ['GIT_TOKEN'] = token
# Удалим старый origin, если он уже есть
!git remote remove origin

# Добавим новый origin с токеном
!git remote add origin https://BobrovDE:$GIT_TOKEN@github.com/BobrovDE-UBVT2304/BobrovDE.git


Вставьте сюда свой GitHub токен: ··········


In [88]:
!git pull origin main --allow-unrelated-histories

From https://github.com/BobrovDE-UBVT2304/BobrovDE
 * branch            main       -> FETCH_HEAD
 * [new branch]      main       -> origin/main
[33mhint: You have divergent branches and need to specify how to reconcile them.[m
[33mhint: You can do so by running one of the following commands sometime before[m
[33mhint: your next pull:[m
[33mhint: [m
[33mhint:   git config pull.rebase false  # merge (the default strategy)[m
[33mhint:   git config pull.rebase true   # rebase[m
[33mhint:   git config pull.ff only       # fast-forward only[m
[33mhint: [m
[33mhint: You can replace "git config" with "git config --global" to set a default[m
[33mhint: preference for all repositories. You can also pass --rebase, --no-rebase,[m
[33mhint: or --ff-only on the command line to override the configured default per[m
[33mhint: invocation.[m
fatal: Need to specify how to reconcile divergent branches.


# Часть 5: Запуск API и тестирование

In [None]:
!nohup uvicorn app:app --host 0.0.0.0 --port 8000 --reload > fastapi.log 2>&1 &

In [None]:
from pyngrok import ngrok
public_url = ngrok.connect(8000)
print('API доступно по адресу:', public_url)

In [None]:
import pandas as pd
data = [
    [15.6, 2.2, 'Dell', 2.5, 8, 256],
    [17.3, 2.5, 'HP', 2.8, 16, 1024]
]
columns = ['Screen_Size', 'Weight', 'Brand', 'Processor_Speed', 'RAM_Size', 'Storage_Capacity']
df_test = pd.DataFrame(data, columns=columns)
df_test.to_csv('/mnt/data/test_input_final_clean.csv', index=False)

Зайдите на `public_url/docs`, загрузите `test_input_final_clean.csv` и получите предсказания.