## Step 1: 下載Iris Dataset

In [1]:
!wget https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv

--2023-09-02 00:41:15--  https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv
Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3975 (3.9K) [text/plain]
Saving to: ‘iris.csv’


2023-09-02 00:41:15 (77.4 MB/s) - ‘iris.csv’ saved [3975/3975]



## Step 2: Data ETL Pipeline

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split


class DataProcessor:
    def __init__(self, input_file):
        """
        Initialize the DataProcessor class.

        :param input_file: str, the path of the input csv file.
        """
        self.input_file = input_file
        self.data = None
        self.train_data = None
        self.test_data = None

    def extract_data(self):
        """
        Extract data from csv file.

        :return: None
        """
        self.data = pd.read_csv(self.input_file)

    def transform_data(self):
        """
        Transform the data by:
        1. Filling NA values with the mean of the column.
        2. Splitting the data into training and testing sets.

        :return: None
        """
        # Fill NA values with mean of the column
        self.data.fillna(self.data.mean(), inplace=True)

        # Split the data into training and testing sets
        self.train_data, self.test_data = train_test_split(self.data, test_size=0.2)

    def load_data(self, train_output_file, test_output_file):
        """
        Save the transformed data to csv files.

        :param train_output_file: str, the path of the training output csv file.
        :param test_output_file: str, the path of the testing output csv file.
        :return: None
        """
        self.train_data.to_csv(train_output_file, index=False)
        self.test_data.to_csv(test_output_file, index=False)


In [3]:
input_file = 'iris.csv'
train_output_file = 'train_output.csv'
test_output_file = 'test_output.csv'
data_processor = DataProcessor(input_file)
data_processor.extract_data()
data_processor.transform_data()
data_processor.load_data(train_output_file, test_output_file)

  self.data.fillna(self.data.mean(), inplace=True)


## Step 3: Model Training, Evaluation Pipeline

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

class Model:
    def __init__(self, model_file='model.pkl'):
        """
        Initialize the model.

        Parameters:
        model_file (str): The file to save/load the model.
        """
        self.model_file = model_file
        self.model = None

    def train(self, train_data_file='train_data.csv'):
        """
        Train the model using the data in the train_data_file.

        Parameters:
        train_data_file (str): The file containing the training data.

        Returns:
        None
        """
        # Load the training data
        data = pd.read_csv(train_data_file)
        X = data.iloc[:, :-1]
        y = data.iloc[:, -1]

        # Split the data into training and validation sets
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train the model
        self.model = LogisticRegression(max_iter=1000)
        self.model.fit(X_train, y_train)

        # Save the model
        with open(self.model_file, 'wb') as f:
            pickle.dump(self.model, f)

        # Evaluate the model on the validation set
        y_pred = self.model.predict(X_val)
        accuracy = accuracy_score(y_val, y_pred)
        print('Validation Accuracy:', accuracy)

    def evaluate(self, test_data_file='test_data.csv'):
        """
        Evaluate the model using the data in the test_data_file.

        Parameters:
        test_data_file (str): The file containing the test data.

        Returns:
        float: The accuracy of the model on the test data.
        """
        # Load the test data
        data = pd.read_csv(test_data_file)
        X_test = data.iloc[:, :-1]
        y_test = data.iloc[:, -1]

        # Load the model
        with open(self.model_file, 'rb') as f:
            self.model = pickle.load(f)

        # Evaluate the model on the test set
        y_pred = self.model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        return accuracy

    def predict(self, X):
        """
        Predict the target values for the given features X.

        Parameters:
        X (array-like): The features to predict the target values.

        Returns:
        array: The predicted target values.
        """
        # Load the model
        with open(self.model_file, 'rb') as f:
            self.model = pickle.load(f)

        # Predict the target values
        y_pred = self.model.predict(X)
        return y_pred

In [5]:
# 創建Model類的一個實例
model = Model()

# 使用訓練數據訓練模型
model.train('train_output.csv')

# 使用測試數據評估模型
accuracy = model.evaluate('test_output.csv')
print('Test Accuracy:', accuracy)

# 預測新數據的目標值
import numpy as np
X_new = np.array([[5.1, 3.5, 1.4, 0.2], [6.7, 3.0, 5.2, 2.3]])
y_pred = model.predict(X_new)
print('Predicted Target Values:', y_pred)


Validation Accuracy: 0.9583333333333334
Test Accuracy: 0.9666666666666667
Predicted Target Values: ['Setosa' 'Virginica']




## Step 4: Model Deployment Pipeline

In [None]:
!pip install -q fastapi nest-asyncio pyngrok uvicorn

In [7]:
import pickle
from fastapi import FastAPI, Request
from pydantic import BaseModel

# 初始化 FastAPI 應用
app = FastAPI()

# 定義請求的資料模型
class Item(BaseModel):
    data: list

# 載入模型
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)

# 定義路由
@app.post('/predict')
def predict(item: Item):
    data = item.data
    prediction = model.predict([data])
    return {'prediction': prediction[0]}

In [None]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn

ngrok.set_auth_token("1abnk57uHxcXm5RHvDPTcR3Vqbx_3jr5ZKL2gm41tBGBJCvYB")

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)



INFO:     Started server process [813]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


Public URL: https://dc8c-104-196-132-242.ngrok-free.app
INFO:     112.104.26.9:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     112.104.26.9:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     112.104.26.9:0 - "POST / HTTP/1.1" 404 Not Found
INFO:     112.104.26.9:0 - "POST /predict HTTP/1.1" 200 OK


