In [1]:
#установим необходимые библиотеки
!pip install xgboost scikit-learn joblib pandas numpy fastapi uvicorn pyngrok python-multipart

Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.5-py3-none-any.whl.metadata (8.9 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.2-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.5-py3-none-any.whl (23 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Downloading starlette-0.46.2-py3-none-any.whl (72 kB)
[2K   [90m

In [64]:
# Подключение Google Drive
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/MyDrive/Учёба_МТУСИ/ColabNotebooks/2 курс"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Учёба_МТУСИ/ColabNotebooks/2 курс


Создадим ML-Pipeline

In [65]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib

In [66]:
# загрузка данных
file_path = 'Laptop_price.csv'
df = pd.read_csv(file_path)

In [67]:
# разделение данных
X = df.drop(columns=['Price'])
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [68]:
# Определение числовых и категориальных признаков
num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()


In [69]:
# Создание трансформеров
num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])

In [70]:
# Сборка пайплайна
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])

In [71]:
# Обучение модели
pipeline.fit(X_train, y_train)

In [72]:
# Сохранение модели
joblib.dump(pipeline, 'Laptop_price_model.pkl')


['Laptop_price_model.pkl']

Версионирование с GIT

In [108]:
!git init
!git add .

Reinitialized existing Git repository in /content/drive/MyDrive/Учёба_МТУСИ/ColabNotebooks/2 курс/.git/


In [111]:
!git commit -m "Добавлен ML-пайплайн"

On branch master
nothing to commit, working tree clean
main	https://Dante42a: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@github.com/Dante42a/Pepeline_Labs.git (fetch)
main	https://Dante42a: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@github.com/Dante42a/Pepeline_Labs.git (push)


In [134]:
!git remote set-url main https://Dante42a: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@github.com/Dante42a/Pepeline_Labs.git
!git push -u main master

Branch 'master' set up to track remote branch 'master' from 'main'.
Everything up-to-date


Развернём FastAPI

In [135]:
#создадим файл app.py
%%writefile app.py
from fastapi import FastAPI, File, UploadFile
import pandas as pd
import joblib
from io import BytesIO

# загрузим модель
app = FastAPI()
model_path = "Laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}

Overwriting app.py


In [136]:
!pip install pyngrok
!ngrok config add-authtoken 2wJakOTqxYvGkr7GFTsCIG7NrKU_5p1489azftJKR1iibYSY3

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [137]:
# Запуск сервера в фоне
!nohup uvicorn app:app --host 0.0.0.0 --port 8000 --reload > fastapi.log 2>&1 &
# Получение публичного URL
from pyngrok import ngrok
public_url = ngrok.connect(8000)
print("API доступно по адресу:", public_url)

API доступно по адресу: NgrokTunnel: "https://d698-34-55-165-128.ngrok-free.app" -> "http://localhost:8000"


Тестирование доступа к апи

In [139]:
import requests
import pandas as pd

# Создадим тестовые данные в формате CSV
test_data = pd.DataFrame([{
    "Company": "Dell",
    "TypeName": "Ultrabook",
    "Inches": 13.3,
    "ScreenResolution": "Full HD 1920x1080",
    "Cpu": "Intel Core i5 2.3GHz",
    "Ram": "8GB",
    "Memory": "256GB SSD",
    "Gpu": "Intel Iris Plus Graphics 640",
    "OpSys": "Windows 10",
    "Weight": "1.2kg"
}])

# Сохраним в CSV
test_data.to_csv("test.csv", index=False)

# Отправка запроса
with open("test.csv", "rb") as f:
    response = requests.post(
        "https://d698-34-55-165-128.ngrok-free.app",
        files={"file": f}
    )
print(response.json())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)