In [1]:
#import
from IPython.display import Image
%matplotlib inline
import pandas as pd
from io import StringIO
import sys

In [2]:
csv_data = \
'''A,B,C,D
1.0,2.0,3.0,4.0
5.0,6.0,,8.0
10.0,11.0,12.0,'''

In [3]:
df = pd.read_csv(StringIO(csv_data))

In [4]:
df

Unnamed: 0,A,B,C,D
0,1.0,2.0,3.0,4.0
1,5.0,6.0,,8.0
2,10.0,11.0,12.0,


In [5]:
#Access the underlying numpy array via 'values' attribute
df.values

array([[ 1.,  2.,  3.,  4.],
       [ 5.,  6., nan,  8.],
       [10., 11., 12., nan]])

In [6]:
#Remove rows that contain missing values 
df.dropna(axis = 0)

Unnamed: 0,A,B,C,D
0,1.0,2.0,3.0,4.0


In [7]:
#missing values of each column
df.isnull().sum()

A    0
B    0
C    1
D    1
dtype: int64

In [8]:
#Remove columns that contains missing values 
df.dropna(axis = 1)

Unnamed: 0,A,B
0,1.0,2.0
1,5.0,6.0
2,10.0,11.0


In [9]:
#Drop only rows with all columns with NaN
df.dropna(how='all')

Unnamed: 0,A,B,C,D
0,1.0,2.0,3.0,4.0
1,5.0,6.0,,8.0
2,10.0,11.0,12.0,


In [10]:
#Only drop rows where NaN appear in specific columns (here: column 'C')
df.dropna(subset=['C']) 

Unnamed: 0,A,B,C,D
0,1.0,2.0,3.0,4.0
2,10.0,11.0,12.0,


In [11]:
#Drop rows that have fewer than 3 real values
df.dropna(thresh = 4)

Unnamed: 0,A,B,C,D
0,1.0,2.0,3.0,4.0


In [12]:
#Check original Array
df.values

array([[ 1.,  2.,  3.,  4.],
       [ 5.,  6., nan,  8.],
       [10., 11., 12., nan]])

In [13]:
#Impute missing values by using the column Mean 
from sklearn.impute import SimpleImputer
import numpy as np

In [14]:
imr = SimpleImputer(missing_values=np.nan, strategy='mean')
imr = imr.fit(df.values)
imputed_data = imr.transform(df.values)
imputed_data

array([[ 1. ,  2. ,  3. ,  4. ],
       [ 5. ,  6. ,  7.5,  8. ],
       [10. , 11. , 12. ,  6. ]])

In [15]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.5,8.0
2,10.0,11.0,12.0,6.0


In [16]:
import pandas as pd 
df = pd.DataFrame([ 
['green', 'M', 10.1, 'class2'], 
['red', 'L', 13.5, 'class1'], 
['blue', 'XL', 15.3, 'class2']]) 
df.columns = ['color', 'size', 'price', 'classlabel'] 
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [17]:
size_mapping = { 
'XL': 3, 
'L': 2, 
'M': 1} 
df['size'] = df['size'].map(size_mapping) 
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class2
1,red,2,13.5,class1
2,blue,3,15.3,class2


In [18]:
inv_size_mapping = {v: k for k, v in size_mapping.items()} 
df['size'].map(inv_size_mapping)

0     M
1     L
2    XL
Name: size, dtype: object

In [19]:
#Create mapping dict to convert class labels from strings to integers 
class_mapping = {label: idx for idx, label in 
enumerate(np.unique(df['classlabel']))} 
class_mapping 

{'class1': 0, 'class2': 1}

In [20]:
#To convert class labels from strings to integers 
df['classlabel'] = df['classlabel'].map(class_mapping) 
df 

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,1
1,red,2,13.5,0
2,blue,3,15.3,1


In [21]:
from sklearn.preprocessing import LabelEncoder 
#Label encoding with sklearn's LabelEncoder 
class_le = LabelEncoder() 
y = class_le.fit_transform(df['classlabel'].values) 
y 

array([1, 0, 1])

In [22]:
#Reverse mapping 
class_le.inverse_transform(y)

array([1, 0, 1])

In [23]:
X = df[['color', 'size', 'price']].values 
color_le = LabelEncoder() 
X[:, 0] = color_le.fit_transform(X[:, 0]) 
X

array([[1, 1, 10.1],
       [2, 2, 13.5],
       [0, 3, 15.3]], dtype=object)

In [24]:
from sklearn.preprocessing import OneHotEncoder 
X = df[['color', 'size', 'price']].values 
color_ohe = OneHotEncoder() 
color_ohe.fit_transform(X[:, 0].reshape(-1, 1)).toarray() 
from sklearn.compose import ColumnTransformer 
X = df[['color', 'size', 'price']].values 
c_transf = ColumnTransformer([ ('onehot', OneHotEncoder(), [0]), 
('nothing', 'passthrough', [1, 2])]) 
c_transf.fit_transform(X).astype(float)

array([[ 0. ,  1. ,  0. ,  1. , 10.1],
       [ 0. ,  0. ,  1. ,  2. , 13.5],
       [ 1. ,  0. ,  0. ,  3. , 15.3]])

In [25]:
#One-hot encoding via pandas 
pd.get_dummies(df[['price', 'color', 'size']]) 
#Multicollinearity guard in get_dummies 
pd.get_dummies(df[['price', 'color', 'size']], drop_first=True) 
 
#Multicollinearity guard for the OneHotEncoder 
color_ohe = OneHotEncoder(categories='auto', drop='first') 
c_transf = ColumnTransformer([('onehot', color_ohe, [0]), 
                              ('nothing', 'passthrough', [1, 2])]) 
c_transf.fit_transform(X).astype(float)

array([[ 1. ,  0. ,  1. , 10.1],
       [ 0. ,  1. ,  2. , 13.5],
       [ 0. ,  0. ,  3. , 15.3]])

In [26]:
# Load the dataset from the URL 
df_wine = pd.read_csv('https://archive.ics.uci.edu/' 
                    'ml/machine-learning-databases/wine/wine.data', 
                    header=None) 
 
# If using an offline dataset, uncomment the following line and comment 
#the URL line above 
# df_wine = pd.read_csv('wine.data', header=None) 
 
# Assign column names to the DataFrame 
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 
                   'Alcalinity of ash', 'Magnesium', 'Total phenols', 
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 
                   'Proline']
 
 # Print the unique class labels 
print('Class labels:', np.unique(df_wine['Class label'])) 
 
# Display the first few rows of the DataFrame 
df_wine.head() 

Class labels: [1 2 3]


Unnamed: 0,Class label,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [27]:
from sklearn.model_selection import train_test_split

X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values

X_train, X_test, y_train, y_test = \
    train_test_split(X, y,
                     test_size=0.3,
                     random_state=0,
                     stratify=y)

In [28]:
from sklearn.preprocessing import MinMaxScaler 
 
mms = MinMaxScaler() 
X_train_norm = mms.fit_transform(X_train) 
X_test_norm = mms.transform(X_test)

In [29]:
print("X_train_norm:\n", X_train_norm)

X_train_norm:
 [[0.64619883 0.83201581 0.4248366  ... 0.45744681 0.28571429 0.19400856]
 [0.6871345  0.15612648 0.65359477 ... 0.81914894 0.63369963 0.68259629]
 [0.67836257 0.15019763 0.65359477 ... 0.75531915 0.52747253 0.71825963]
 ...
 [0.72222222 0.84980237 0.34640523 ... 0.10638298 0.02197802 0.09771755]
 [0.16081871 0.06916996 0.39215686 ... 0.54255319 0.68131868 0.43366619]
 [0.37719298 0.61857708 0.45751634 ... 0.75531915 0.68131868 0.13195435]]


In [30]:
print("First 5 rows of X_train_norm:\n", X_train_norm[:5])

First 5 rows of X_train_norm:
 [[0.64619883 0.83201581 0.4248366  0.46236559 0.27160494 0.35172414
  0.09704641 0.68       0.18987342 0.23623446 0.45744681 0.28571429
  0.19400856]
 [0.6871345  0.15612648 0.65359477 0.43548387 0.7654321  0.67931034
  0.50632911 0.74       0.2943038  0.3250444  0.81914894 0.63369963
  0.68259629]
 [0.67836257 0.15019763 0.65359477 0.59677419 0.38271605 0.69655172
  0.61392405 0.32       0.62025316 0.35168739 0.75531915 0.52747253
  0.71825963]
 [0.61403509 0.20948617 0.62091503 0.40860215 0.49382716 0.47241379
  0.46202532 0.32       0.35443038 0.21847247 0.65957447 0.58608059
  0.58273894]
 [0.34795322 0.33992095 0.32679739 0.38172043 0.2962963  0.22068966
  0.06751055 1.         0.16455696 0.47602131 0.26595745 0.11355311
  0.29743224]]


In [31]:
from sklearn.preprocessing import StandardScaler 
stdsc = StandardScaler() 
X_train_std = stdsc.fit_transform(X_train) 
X_test_std = stdsc.transform(X_test) 

In [32]:
print("X_train_std:\n", X_train_std)

X_train_std:
 [[ 0.71225893  2.22048673 -0.13025864 ... -0.20017028 -0.82164144
  -0.62946362]
 [ 0.88229214 -0.70457155  1.17533605 ...  1.33982592  0.54931269
   1.47568796]
 [ 0.84585645 -0.73022996  1.17533605 ...  1.06806189  0.1308109
   1.62934866]
 ...
 [ 1.02803489  2.29746195 -0.5778911  ... -1.69487249 -1.86068037
  -1.04434751]
 [-1.30384913 -1.08089484 -0.31677217 ...  0.16218176  0.73691694
   0.40313628]
 [-0.40510216  1.29678411  0.05625489 ...  1.06806189  0.73691694
  -0.89683324]]


In [33]:
print("First 5 rows of X_train_std:\n", X_train_std[:5])

First 5 rows of X_train_std:
 [[ 0.71225893  2.22048673 -0.13025864  0.05962872 -0.50432733 -0.52831584
  -1.24000033  0.84118003 -1.05215112 -0.29218864 -0.20017028 -0.82164144
  -0.62946362]
 [ 0.88229214 -0.70457155  1.17533605 -0.09065504  2.34147876  1.01675879
   0.66299475  1.0887425  -0.49293533  0.13152077  1.33982592  0.54931269
   1.47568796]
 [ 0.84585645 -0.73022996  1.17533605  0.81104754  0.13597904  1.09807851
   1.16326665 -0.64419483  1.25249578  0.25863359  1.06806189  0.1308109
   1.62934866]
 [ 0.57866141 -0.4736459   0.98882252 -0.24093881  0.77628541  0.04092218
   0.45700044 -0.64419483 -0.1709626  -0.37693052  0.66041583  0.36170844
   1.045438  ]
 [-0.52655446  0.09083903 -0.68979922 -0.39122257 -0.36203702 -1.1463457
  -1.37732987  2.16151323 -1.18771859  0.85182676 -1.01546239 -1.49990297
  -0.18384759]]


In [34]:
ex = np.array([0, 1, 2, 3, 4, 5]) 
print('standarized:', (ex - ex.mean()) / ex.std()) 
#normalize 
print('normalized', (ex - ex.min()) / (ex.max() - ex.min())) 

standarized: [-1.46385011 -0.87831007 -0.29277002  0.29277002  0.87831007  1.46385011]
normalized [0.  0.2 0.4 0.6 0.8 1. ]


In [1]:
pip install openvino

Collecting openvino
  Downloading openvino-2025.3.0-19807-cp313-cp313-win_amd64.whl.metadata (13 kB)
Collecting numpy<2.3.0,>=1.16.6 (from openvino)
  Downloading numpy-2.2.6-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting openvino-telemetry>=2023.2.1 (from openvino)
  Downloading openvino_telemetry-2025.2.0-py3-none-any.whl.metadata (2.3 kB)
Downloading openvino-2025.3.0-19807-cp313-cp313-win_amd64.whl (40.6 MB)
   ---------------------------------------- 0.0/40.6 MB ? eta -:--:--
   ---- ----------------------------------- 4.2/40.6 MB 35.9 MB/s eta 0:00:02
   ---- ----------------------------------- 4.2/40.6 MB 35.9 MB/s eta 0:00:02
   ---- ----------------------------------- 4.2/40.6 MB 35.9 MB/s eta 0:00:02
   ---- ----------------------------------- 4.2/40.6 MB 35.9 MB/s eta 0:00:02
   ---- ----------------------------------- 4.2/40.6 MB 35.9 MB/s eta 0:00:02
   ------- -------------------------------- 7.3/40.6 MB 5.6 MB/s eta 0:00:06
   ------- -----------------------------

In [3]:
from openvino.runtime import Core

In [4]:
core = Core()
devices = core.avaiable_devices
print("Available devices", devices)

AttributeError: 'Core' object has no attribute 'avaiable_devices'