In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sqlalchemy import create_engine
import logging
from typing_extensions import Annotated
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
def query_database(table_name: str, limit_num: int) -> pd.DataFrame:
    connection_string = 'postgresql://airflow:airflow@es.aidery.io:5433/postgres'
    engine = create_engine(connection_string)
    
    # Define the labels
    labels = ['Circle', 'Side-to-Side', 'Square', 'negative']
    # Initialize an empty list to store DataFrames
    data_frames = []
    
    # Iterate over each label
    for label in labels:
        # Query to select limited rows for each label
        query = f"SELECT * FROM {table_name} WHERE gesture = '{label}' LIMIT {limit_num}"
        
        # Execute the query and append the results to the data_frames list
        with engine.connect() as connection:
            label_data = pd.read_sql_query(query, connection)
            if not label_data.empty:
                data_frames.append(label_data)
            else:
                print(f"No data found for label: {label}")
    
    # Concatenate the list of DataFrames into a single DataFrame
    selected_data = pd.concat(data_frames, ignore_index=True)
    
    return selected_data

In [4]:
df = query_database('infineon_gesture', 2500)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   index    10000 non-null  int64  
 1   accx     10000 non-null  float64
 2   accy     10000 non-null  float64
 3   accz     10000 non-null  float64
 4   gyrox    10000 non-null  float64
 5   gyroy    10000 non-null  float64
 6   gyroz    10000 non-null  float64
 7   gesture  10000 non-null  object 
dtypes: float64(6), int64(1), object(1)
memory usage: 625.1+ KB


In [5]:
df = df.drop(columns=['index'], axis=1)
print(df.info())
print(df.gesture.value_counts()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   accx     10000 non-null  float64
 1   accy     10000 non-null  float64
 2   accz     10000 non-null  float64
 3   gyrox    10000 non-null  float64
 4   gyroy    10000 non-null  float64
 5   gyroz    10000 non-null  float64
 6   gesture  10000 non-null  object 
dtypes: float64(6), object(1)
memory usage: 547.0+ KB
None
Circle          2500
Side-to-Side    2500
Square          2500
negative        2500
Name: gesture, dtype: int64


In [30]:
X = df.drop(columns=['gesture'], axis=1)    
y = df['gesture']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(f'\n', '*'*100, '\n')
print(y_train.value_counts())
print(y_test.value_counts())

(8000, 6)
(8000,)
(2000, 6)
(2000,)

 **************************************************************************************************** 

Square          2008
negative        2004
Side-to-Side    2000
Circle          1988
Name: gesture, dtype: int64
Circle          512
Side-to-Side    500
negative        496
Square          492
Name: gesture, dtype: int64


In [6]:
# !pip install requests
X.head(10)


Unnamed: 0,accx,accy,accz,gyrox,gyroy,gyroz
0,-0.004444,5.6e-05,-0.000718,-8.4e-05,-0.000193,-0.000155
1,-0.024944,0.001041,-0.003415,0.000217,-0.000366,-0.000166
2,-0.069159,0.003117,-0.009082,0.000711,-0.000698,-0.000216
3,-0.128699,0.005928,-0.016184,0.00111,-0.000986,-0.00031
4,-0.187071,0.008889,-0.022357,0.001302,-0.00093,-0.000379
5,-0.231421,0.011451,-0.026431,0.001473,-0.000445,-0.000339
6,-0.256712,0.013069,-0.02854,0.001759,0.000185,-0.000154
7,-0.264923,0.013538,-0.029417,0.002107,0.000572,0.000164
8,-0.261762,0.013169,-0.029863,0.002383,0.000602,0.000497
9,-0.253756,0.012403,-0.03036,0.00243,0.000455,0.000597


In [33]:
import requests

def download_file(url, file_name):
    # Send a GET request to the download endpoint
    response = requests.get(url, params={'file_name': file_name}, stream=True)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Extract the filename from the Content-Disposition header
        filename = response.headers.get('Content-Disposition').split('filename=')[1].strip('"')
        
        # Save the file to the current directory
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        
        print(f"Downloaded: {filename}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")

# http://localhost:8999/download?file_name=tmh_infineon2024-06-10

# Define the URL and model name
url = "http://localhost:8999/download"
# file_name = "tmh_infineon2024-06-10"
file_name = "test"

# Download the file
download_file(url, file_name)


Downloaded: test.pkl


In [6]:
import requests

def download_file(url, model_name, model_version):
    # Send a GET request to the download endpoint with the model name and version as parameters
    response = requests.get(url, params={'model_name': model_name, 'model_version': model_version}, stream=True)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Extract the filename from the Content-Disposition header
        content_disposition = response.headers.get('Content-Disposition')
        if content_disposition:
            filename = content_disposition.split('filename=')[1].strip('"')
        else:
            filename = f"{model_name}_{model_version}.bin"  # Fallback to a default name if header is missing
        
        # Save the file to the current directory
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        
        print(f"Downloaded: {filename}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")

# Define the URL, model name, and model version
url = "https://zenml-api.aidery.io/download_model"
model_name = "trained_model_psoc6_2024-06-11"
model_version = "rav37vrhtwcj4asc"

# Download the file
download_file(url, model_name, model_version)


Downloaded: trained_model_psoc6_2024-06-11_rav37vrhtwcj4asc.pkl


In [16]:
dtc = DecisionTreeClassifier()
model = dtc.fit(X_train, y_train)   
y_pred = model.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))  
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))    

Accuracy: 0.9955
1.0
0.9955
[[507   4   0   1]
 [  0 497   0   3]
 [  1   0 491   0]
 [  0   0   0 496]]
              precision    recall  f1-score   support

      Circle       1.00      0.99      0.99       512
Side-to-Side       0.99      0.99      0.99       500
      Square       1.00      1.00      1.00       492
    negative       0.99      1.00      1.00       496

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [17]:
# from sklearn.externals import joblib
import joblib   

In [18]:
#save the model
joblib.dump(model, 'model.pkl')

['model.pkl']

# Load model from download API

In [7]:
my_df = df.copy()
my_df.head(2)

Unnamed: 0,accx,accy,accz,gyrox,gyroy,gyroz,gesture
0,-0.004444,5.6e-05,-0.000718,-8.4e-05,-0.000193,-0.000155,Circle
1,-0.024944,0.001041,-0.003415,0.000217,-0.000366,-0.000166,Circle


In [8]:

gesture_mapping = {'Circle': 1, 'Side-to-Side': 2, 'Square': 3, 'negative': 4}
# Map the gesture labels to numerical values in the DataFrame
my_df['gesture'] = my_df['gesture'].map(gesture_mapping)
my_df.gesture.value_counts()

1    2500
2    2500
3    2500
4    2500
Name: gesture, dtype: int64

In [9]:

X = my_df.drop(columns=['gesture'], axis=1)    
y = my_df['gesture']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(f'\n', '*'*100, '\n')
print(y_train.value_counts())
print(y_test.value_counts())

(8000, 6)
(8000,)
(2000, 6)
(2000,)

 **************************************************************************************************** 

3    2008
4    2004
2    2000
1    1988
Name: gesture, dtype: int64
1    512
2    500
4    496
3    492
Name: gesture, dtype: int64


In [11]:
import joblib
# loaded_model = joblib.load('tmh_infineon2024-06-10.pkl')
loaded_model = joblib.load('trained_model_psoc6_2024-06-11_rav37vrhtwcj4asc.pkl')
loaded_model.feature_importances_

array([0.28238601, 0.11888574, 0.29384783, 0.0973617 , 0.11546801,
       0.0920507 ])

In [12]:
train_acc = loaded_model.score(X_train.to_numpy(), y_train.to_numpy())
print(train_acc)

1.0


In [14]:
import sklearn
print(sklearn.__version__)

1.3.0


In [13]:
# loaded_model = joblib.load('tmh_infineon2024-06-10.pkl')
y_pred = loaded_model.predict(X_test.to_numpy())   
print("Accuracy:", metrics.accuracy_score(y_test.to_numpy(), y_pred))
print(loaded_model.score(X_train.to_numpy(), y_train.to_numpy()))
print(loaded_model.score(X_test.to_numpy(), y_test.to_numpy()))
print(confusion_matrix(y_test.to_numpy(), y_pred))
print(classification_report(y_test.to_numpy(), y_pred))

Accuracy: 0.9955
1.0
0.9955
[[507   4   0   1]
 [  0 497   0   3]
 [  1   0 491   0]
 [  0   0   0 496]]
              precision    recall  f1-score   support

           1       1.00      0.99      0.99       512
           2       0.99      0.99      0.99       500
           3       1.00      1.00      1.00       492
           4       0.99      1.00      1.00       496

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [24]:
loaded_model.feature_importances_

array([0.16141822, 0.28893745, 0.20384134, 0.34580299, 0.        ,
       0.        ])

In [22]:
print(loaded_model.score(X_test_array, y_test_array))

0.0


In [16]:
# Save the model
joblib.dump(loaded_model, 'tmh_infineon2024-06-07_with_feature_names.pkl')

# Now when you load and use the model, it will expect feature names
loaded_modeled = joblib.load('tmh_infineon2024-06-07_with_feature_names.pkl')

In [20]:
# loaded_model 
loaded_model = joblib.load('model.pkl')
y_pred = loaded_model.predict(X_test)   

# Evaluate the loaded model
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print(loaded_model.score(X_train, y_train))
print(loaded_model.score(X_test, y_test))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9955
1.0
0.9955
[[507   4   0   1]
 [  0 497   0   3]
 [  1   0 491   0]
 [  0   0   0 496]]
              precision    recall  f1-score   support

      Circle       1.00      0.99      0.99       512
Side-to-Side       0.99      0.99      0.99       500
      Square       1.00      1.00      1.00       492
    negative       0.99      1.00      1.00       496

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [21]:
loaded_model.score(X_train, y_train)

1.0

In [22]:
loaded_model.feature_importances_

array([0.27981737, 0.12680591, 0.29179539, 0.10584079, 0.11021145,
       0.08552909])

In [32]:
loaded_models = joblib.load('model.pkl')
loaded_models.score(X_train, y_train)

1.0

In [33]:
loaded_models.feature_importances_

array([0.28025221, 0.1229093 , 0.2945143 , 0.10601968, 0.11637407,
       0.07993043])

In [27]:
## loaded_model 
loaded_model = joblib.load('model.pkl')
y_pred = loaded_model.predict(X_test)   

# Evaluate the loaded model
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print(loaded_model.score(X_train, y_train))
print(loaded_model.score(X_test, y_test))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9955
1.0
0.9955
[[507   4   0   1]
 [  0 497   0   3]
 [  1   0 491   0]
 [  0   0   0 496]]
              precision    recall  f1-score   support

      Circle       1.00      0.99      0.99       512
Side-to-Side       0.99      0.99      0.99       500
      Square       1.00      1.00      1.00       492
    negative       0.99      1.00      1.00       496

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [14]:
#load model
loaded_model = joblib.load('model.pkl')
y_pred = loaded_model.predict(X_test)   

# Evaluate the loaded model
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print(loaded_model.score(X_train, y_train))
print(loaded_model.score(X_test, y_test))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))



Accuracy: 0.9955
1.0
0.9955
[[507   4   0   1]
 [  0 497   0   3]
 [  1   0 491   0]
 [  0   0   0 496]]
              precision    recall  f1-score   support

      Circle       1.00      0.99      0.99       512
Side-to-Side       0.99      0.99      0.99       500
      Square       1.00      1.00      1.00       492
    negative       0.99      1.00      1.00       496

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [1]:
import joblib
import os

# Define the directory where you want to save the model
directory = "/app/Train_model_deploy_model/my_model/"

# Make sure the directory exists, create it if it doesn't
os.makedirs(directory, exist_ok=True)

# Save the model to the specified directory
model_file_path = os.path.join(directory, "model_2024-05-03_08-15-11.pkl")
# joblib.dump(your_model, model_file_path)

# Now you can load the model from the directory
loaded_model = joblib.load(model_file_path)

# You can now use the loaded model for predictions or any other operations






# loaded_models = joblib.load("/app/Train_model_deploy_model/my_model/model_2024-05-03_04-01-42.pkl")
print("Model save as:", loaded_model)  

Model save as: DecisionTreeClassifier()


In [3]:
import joblib
import os

# Define the directory where you want to save the model
directory = "/app/Train_model_deploy_model/my_model/"

# Make sure the directory exists, create it if it doesn't
os.makedirs(directory, exist_ok=True)

# Save the model to the specified directory
model_file_path = os.path.join(directory, "model_2024-05-03_08-21-13.pkl")
# joblib.dump(your_model, model_file_path)

# Now you can load the model from the directory
loaded_model = joblib.load(model_file_path)

print("Model save as:", loaded_model)  
loaded_model.get_params()

Model save as: RandomForestClassifier()


{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [None]:
model.get_params()