# Настройка и первые шаги


In [1]:
%pip install xgboost scikit-learn joblib pandas numpy fastapi uvicorn pyngrok

Collecting xgboost
  Using cached xgboost-3.0.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp311-cp311-win_amd64.whl.metadata (15 kB)
Collecting joblib
  Using cached joblib-1.5.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting numpy
  Using cached numpy-2.2.5-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting fastapi
  Using cached fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Using cached uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Using cached pyngrok-7.2.5-py3-none-any.whl.metadata (8.9 kB)
Collecting scipy (from xgboost)
  Using cached scipy-1.15.2-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none


[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

file_path = 'Laptop_price.csv' 
df = pd.read_csv(file_path)
X = df.drop(columns=['Price'])
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Вывод первых строк данных
df.head()

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,Asus,3.830296,16,512,11.185147,2.641094,17395.093065
1,Acer,2.912833,4,1000,11.311372,3.260012,31607.605919
2,Lenovo,3.241627,4,256,11.853023,2.029061,9291.023542
3,Acer,3.806248,16,512,12.28036,4.573865,17436.728334
4,Acer,3.268097,32,1000,14.990877,4.193472,32917.990718


In [7]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])

pipeline.fit(X_train, y_train)
joblib.dump(pipeline, 'laptop_price_model.pkl')



# Проверка модели
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Загрузка модели и проверка
loaded_model = joblib.load('laptop_price_model.pkl')
y_pred_loaded = loaded_model.predict(X_test)
print("Предсказания загруженной модели:", y_pred_loaded)

Mean Squared Error: 42580.64330033169
Предсказания загруженной модели: [10548.079  31427.553   9632.67    9237.194  31953.426  31646.098
 10553.304  16754.592  18470.611  32379.545   9984.597  10745.556
 32805.45    9098.023  17196.752   9118.695  32062.451  32225.709
 10239.446  31767.838  10555.461  10706.844   9144.038  10418.951
  9426.13   17504.607  10596.327  18269.44   10714.91   32575.307
 16636.7    32787.957  16652.057  31941.846  31740.14   10446.912
 31143.08    9128.383  32598.557  17304.166  32060.291   9598.175
 17628.68   31691.955  17339.125  16987.414  32676.58   16907.783
  9703.095  16994.861  16828.104  31483.943   9861.333  10501.711
 31963.008  32802.332  31379.807  31724.8     9114.882  32293.768
 17601.562  31440.377  31847.338  10707.018   9641.672  31855.707
 32659.092  32420.979  31519.15    9005.247  31679.18   32602.033
 17520.52   31663.432  17213.455  32929.688  18450.264   9129.256
 16823.361   9534.464   9480.509  32930.69   10546.793   9790.112
 1061

# GIT


#### Инициализация репозитория и отправка кода на GitHub

In [8]:
!git init
!git add .
!git commit -m "ML-pipline"
!git remote add origin https://github.com/Petr-Zaychenko/lab_1.git
!git push -u origin master

Initialized empty Git repository in D:/fs/lab_1_fs/.git/




[master (root-commit) 2ed5d40] ML-pipline
 3 files changed, 1397 insertions(+)
 create mode 100644 Laptop_price.csv
 create mode 100644 lab_1_fs.ipynb
 create mode 100644 laptop_price_model.pkl
branch 'master' set up to track 'origin/master'.


To https://github.com/Petr-Zaychenko/lab_1.git
 * [new branch]      master -> master


# FASTAPI

In [9]:
%%writefile app.py
from fastapi import FastAPI, File, UploadFile
import pandas as pd
import joblib
from io import BytesIO

app = FastAPI()

# Загрузка обученной модели
model_path = "laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}


Writing app.py


In [17]:
# Установка python-multipart для корректной работы FastAPI
%pip install python-multipart

# Запуск FastAPI
!uvicorn app:app --reload

# Подключение ngrok
from pyngrok import ngrok

# Укажите ваш токен ngrok
ngrok.set_auth_token("2wd5cydMgehF5yut5Zvk1JmccHi_4NcqEwFGqnpbeYm2H6YuC")

# Создание туннеля для порта 8000
public_url = ngrok.connect(8000)
print("API доступно по адресу:", public_url)

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


^C
API доступно по адресу: NgrokTunnel: "https://bad2-193-187-173-210.ngrok-free.app" -> "http://localhost:8000"


curl -X 'POST' \
  'http://localhost:8000/predict/' \
  -H 'accept: application/json' \
  -H 'Content-Type: multipart/form-data' \
  -F 'file=@Laptop_price.csv;type=text/csv'

 content-length: 15043 
 content-type: application/json 
 date: Sun,04 May 2025 16:13:00 GMT 
 server: uvicorn 