# Preparing data

In [None]:
# import
import pandas as pd
import numpy as np

In [None]:
# load data
data = pd.read_csv("https://github.com/allisonhorst/palmerpenguins/raw/5b5891f01b52ae26ad8cb9755ec93672f49328a8/data/penguins_size.csv")
# check the data
data

Unnamed: 0,species_short,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE
...,...,...,...,...,...,...,...
339,Gentoo,Biscoe,,,,,
340,Gentoo,Biscoe,46.8,14.3,215.0,4850.0,FEMALE
341,Gentoo,Biscoe,50.4,15.7,222.0,5750.0,MALE
342,Gentoo,Biscoe,45.2,14.8,212.0,5200.0,FEMALE


In [None]:
# randomize the index rows
data = data.sample(frac=1).reset_index(drop=True)

# check data
data.head()

Unnamed: 0,species_short,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,Gentoo,Biscoe,44.9,13.3,213.0,5100.0,FEMALE
1,Adelie,Dream,41.3,20.3,194.0,3550.0,MALE
2,Adelie,Torgersen,38.6,17.0,188.0,2900.0,FEMALE
3,Gentoo,Biscoe,41.7,14.7,210.0,4700.0,FEMALE
4,Chinstrap,Dream,50.8,18.5,201.0,4450.0,MALE


In [None]:
# data describe
print(data.describe())
print("---------------------------------------------------------------", "\n\n")

# data info
print(data.info())
print("---------------------------------------------------------------", "\n\n")

# data shape
print(data.shape)

       culmen_length_mm  culmen_depth_mm  flipper_length_mm  body_mass_g
count        342.000000       342.000000         342.000000   342.000000
mean          43.921930        17.151170         200.915205  4201.754386
std            5.459584         1.974793          14.061714   801.954536
min           32.100000        13.100000         172.000000  2700.000000
25%           39.225000        15.600000         190.000000  3550.000000
50%           44.450000        17.300000         197.000000  4050.000000
75%           48.500000        18.700000         213.000000  4750.000000
max           59.600000        21.500000         231.000000  6300.000000
--------------------------------------------------------------- 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species_short      344 non-null    object 
 1   island             344 

In [None]:
# drop missing values
data = data.dropna()
# check the data
data

Unnamed: 0,species_short,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,Gentoo,Biscoe,44.9,13.3,213.0,5100.0,FEMALE
1,Adelie,Dream,41.3,20.3,194.0,3550.0,MALE
2,Adelie,Torgersen,38.6,17.0,188.0,2900.0,FEMALE
3,Gentoo,Biscoe,41.7,14.7,210.0,4700.0,FEMALE
4,Chinstrap,Dream,50.8,18.5,201.0,4450.0,MALE
...,...,...,...,...,...,...,...
339,Gentoo,Biscoe,46.4,15.6,221.0,5000.0,MALE
340,Gentoo,Biscoe,50.0,16.3,230.0,5700.0,MALE
341,Adelie,Dream,37.0,16.9,185.0,3000.0,FEMALE
342,Adelie,Dream,37.2,18.1,178.0,3900.0,MALE


In [None]:
# create a dictionari to map
specie_map = {'Adelie': 1, 'Gentoo': 2, 'Chinstrap':3}

# map it
data['species_short'] = data['species_short'].map(specie_map)
# check the data
data

Unnamed: 0,species_short,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,2,Biscoe,44.9,13.3,213.0,5100.0,FEMALE
1,1,Dream,41.3,20.3,194.0,3550.0,MALE
2,1,Torgersen,38.6,17.0,188.0,2900.0,FEMALE
3,2,Biscoe,41.7,14.7,210.0,4700.0,FEMALE
4,3,Dream,50.8,18.5,201.0,4450.0,MALE
...,...,...,...,...,...,...,...
339,2,Biscoe,46.4,15.6,221.0,5000.0,MALE
340,2,Biscoe,50.0,16.3,230.0,5700.0,MALE
341,1,Dream,37.0,16.9,185.0,3000.0,FEMALE
342,1,Dream,37.2,18.1,178.0,3900.0,MALE


In [None]:
# renaming to make species the target to predict
data = data.rename(columns={'species_short': 'target'})

# move it to the right side of our data frame
cols = list(data.columns)
cols.remove('target')
cols.append('target')
data = data[cols]
# check the data
data

Unnamed: 0,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex,target
0,Biscoe,44.9,13.3,213.0,5100.0,FEMALE,2
1,Dream,41.3,20.3,194.0,3550.0,MALE,1
2,Torgersen,38.6,17.0,188.0,2900.0,FEMALE,1
3,Biscoe,41.7,14.7,210.0,4700.0,FEMALE,2
4,Dream,50.8,18.5,201.0,4450.0,MALE,3
...,...,...,...,...,...,...,...
339,Biscoe,46.4,15.6,221.0,5000.0,MALE,2
340,Biscoe,50.0,16.3,230.0,5700.0,MALE,2
341,Dream,37.0,16.9,185.0,3000.0,FEMALE,1
342,Dream,37.2,18.1,178.0,3900.0,MALE,1


In [None]:
# drop columns we don't need
data = data.drop(["island", "sex"], axis=1)
# check the data
data

Unnamed: 0,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,target
0,44.9,13.3,213.0,5100.0,2
1,41.3,20.3,194.0,3550.0,1
2,38.6,17.0,188.0,2900.0,1
3,41.7,14.7,210.0,4700.0,2
4,50.8,18.5,201.0,4450.0,3
...,...,...,...,...,...
339,46.4,15.6,221.0,5000.0,2
340,50.0,16.3,230.0,5700.0,2
341,37.0,16.9,185.0,3000.0,1
342,37.2,18.1,178.0,3900.0,1


In [None]:
# re-name
penguins_df = data.dropna()

# Creating a SQLite database

In [None]:
# import
import sqlite3

In [None]:
# connect to the SQLite database
conn = sqlite3.connect("Penguins_data.db")
cursor = conn.cursor()

In [None]:
# create a table for the penguins dataset
cursor.execute("""
CREATE TABLE IF NOT EXISTS Penguins_data (
    id INTEGER PRIMARY KEY,
    culmen_length_mm REAL,
    culmen_depth_mm REAL,
    flipper_length_mm REAL,
    body_mass_g REAL,
    target INTEGER
)
""")

<sqlite3.Cursor at 0x7fd3a4cde960>

# Loading the csv file into the SQLite table

In [None]:
# insert data into the table
for row in penguins_df.itertuples(index=False):
    cursor.execute("INSERT INTO Penguins_data (culmen_length_mm, culmen_depth_mm, flipper_length_mm, body_mass_g, target) VALUES (?, ?, ?, ?, ?)",
                   (row.culmen_length_mm, row.culmen_depth_mm, row.flipper_length_mm, row.body_mass_g, row.target))
conn.commit()

# Running SQLite commands

- SELECT
- INSERT 
- UPDATE
- DELETE

## Select

In [None]:
# select query
select_query = "SELECT * FROM Penguins_data"
cursor = conn.execute(select_query)
rows = cursor.fetchall()

# print records
for row in rows:
  print(row)

(1, 44.9, 13.3, 213.0, 5100.0, 2)
(2, 41.3, 20.3, 194.0, 3550.0, 1)
(3, 38.6, 17.0, 188.0, 2900.0, 1)
(4, 41.7, 14.7, 210.0, 4700.0, 2)
(5, 50.8, 18.5, 201.0, 4450.0, 3)
(6, 45.4, 18.7, 188.0, 3525.0, 3)
(7, 43.3, 14.0, 208.0, 4575.0, 2)
(8, 55.9, 17.0, 228.0, 5600.0, 2)
(9, 40.2, 17.1, 193.0, 3400.0, 1)
(10, 41.1, 18.6, 189.0, 3325.0, 1)
(11, 45.2, 15.8, 215.0, 5300.0, 2)
(12, 51.0, 18.8, 203.0, 4100.0, 3)
(13, 47.2, 13.7, 214.0, 4925.0, 2)
(14, 38.6, 17.2, 199.0, 3750.0, 1)
(15, 39.6, 17.2, 196.0, 3550.0, 1)
(16, 42.2, 18.5, 180.0, 3550.0, 1)
(17, 46.0, 18.9, 195.0, 4150.0, 3)
(18, 41.3, 21.1, 195.0, 4400.0, 1)
(19, 40.8, 18.9, 208.0, 4300.0, 1)
(20, 41.1, 18.2, 192.0, 4050.0, 1)
(21, 45.5, 17.0, 196.0, 3500.0, 3)
(22, 43.4, 14.4, 218.0, 4600.0, 2)
(23, 41.5, 18.5, 201.0, 4000.0, 1)
(24, 35.7, 18.0, 202.0, 3550.0, 1)
(25, 40.2, 17.0, 176.0, 3450.0, 1)
(26, 40.3, 18.5, 196.0, 4350.0, 1)
(27, 46.1, 13.2, 211.0, 4500.0, 2)
(28, 36.7, 18.8, 187.0, 3800.0, 1)
(29, 34.6, 21.1, 198.0, 4400.

## Insert

In [None]:
# insert query
insert_query = "INSERT INTO Penguins_data (culmen_length_mm, culmen_depth_mm, flipper_length_mm, body_mass_g, target) VALUES (50.1, 18.1, 191.2, 3250.0, 3);"
conn.execute(insert_query)
conn.commit()

In [None]:
# verify the insertion
cursor = conn.execute("SELECT * FROM Penguins_data WHERE flipper_length_mm = 191.2;")
print(cursor.fetchone())

(335, 50.1, 18.1, 191.2, 3250.0, 3)


## Update

In [None]:
# update a record in the 'Penguins_data' table
update_query = "UPDATE Penguins_data SET culmen_depth_mm = '13.2' WHERE id = '1';"
conn.execute(update_query)
conn.commit()

In [None]:
# verify the update
cursor = conn.execute("SELECT * FROM Penguins_data WHERE id = '1';")
print(cursor.fetchone())

(1, 44.9, 13.2, 213.0, 5100.0, 2)


## Delete

In [None]:
# delete a record from the 'Penguins_data' table
delete_query = "DELETE FROM Penguins_data WHERE id = '325';"
conn.execute(delete_query)
conn.commit()

In [None]:
# verify the deletion
cursor = conn.execute("SELECT * FROM Penguins_data WHERE id = '325';")
print(cursor.fetchone())

None


# ML - Classification

In [None]:
# import
from sklearn.model_selection import train_test_split

# select from database
cursor.execute("SELECT culmen_length_mm, culmen_depth_mm, flipper_length_mm, body_mass_g, target FROM Penguins_data")
data = cursor.fetchall()

# prepare train and test
X, y = zip(*[(row[:4], row[4]) for row in data])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1337)

In [None]:
# import
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# load the model
model = SVC()
# fit the model
model.fit(X_train, y_train)

# make a prediction
y_pred = model.predict(X_test)

# print the accuracy of the predicted output
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.70


In [None]:
# prediction
X_new = [47.9, 14.2, 222.0, 3100.0]
model.predict([X_new])

array([1])

In [None]:
# import
import joblib

# save the model
joblib.dump(model, "svc_model.joblib")

['svc_model.joblib']

# Gradio

In [None]:
!pip install gradio -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.1/144.1 KB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.5/129.5 KB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 KB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 KB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 KB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.8/57.8 KB[0m [31m6.3 MB/s[0

In [None]:
# load the trained model
clf = joblib.load('svc_model.joblib')

In [None]:
def classify(x1, x2, x3, x4):
    # Use the loaded SVM model to predict the output
    input_list = [[x1, x2, x3, x4]]
    y_pred = clf.predict(input_list)

    # Return the predicted output as a string
    return str(y_pred[0])

demo = gr.Interface(fn=classify, inputs=["number", "number", "number", "number"], outputs="text", title="SVM Classifier", description="Enter four values to classify.").launch()

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>