* A **Case-based reasoning(CBR)** is a paradigm of artificial intelligence and cognitive science that models the reasoning process as primarily memory based. Case-based reasoning systems solve new problems by retrieving stored ‘cases’ describing similar prior problem-solving episodes and adapting their solutions to fit new needs.

In [1]:
#Importing the necessary libraries
import numpy as np
import pandas as pd
from scipy.spatial import distance
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Loading the dataset
library = pd.read_csv('D:\\SLIIT\\3rd year 2nd sem\\Machine Learning amd Optimization Methods\\Coding\\library.csv')
cases = pd.read_csv('D:\\SLIIT\\3rd year 2nd sem\\Machine Learning amd Optimization Methods\\Coding\\library.csv')

In [3]:
library

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,Sunny,Hot,High,False,No
1,Sunny,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Rainy,Mild,High,False,Yes
4,Rainy,Cool,Normal,False,Yes
5,Rainy,Cool,Normal,True,No
6,Overcast,Cool,Normal,True,Yes
7,Sunny,Mild,High,False,No
8,Sunny,Cool,Normal,False,Yes
9,Rainy,Mild,Normal,False,Yes


In [4]:
cases

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,Sunny,Hot,High,False,No
1,Sunny,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Rainy,Mild,High,False,Yes
4,Rainy,Cool,Normal,False,Yes
5,Rainy,Cool,Normal,True,No
6,Overcast,Cool,Normal,True,Yes
7,Sunny,Mild,High,False,No
8,Sunny,Cool,Normal,False,Yes
9,Rainy,Mild,Normal,False,Yes


In [6]:
#Exploring the library.csv dataset
library.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Outlook       14 non-null     object
 1    Temperature  14 non-null     object
 2    Humidity     14 non-null     object
 3    Windy        14 non-null     object
 4    Play         14 non-null     object
dtypes: object(5)
memory usage: 688.0+ bytes


In [7]:
library.describe()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
count,14,14,14,14,14
unique,3,3,2,2,2
top,Sunny,Mild,High,False,Yes
freq,5,6,7,8,9


In [8]:
library.isnull().sum()

Outlook         0
 Temperature    0
 Humidity       0
 Windy          0
 Play           0
dtype: int64

In [9]:
library.dtypes

Outlook         object
 Temperature    object
 Humidity       object
 Windy          object
 Play           object
dtype: object

In [10]:
#Exploring the cases.csv dataset
cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Outlook       14 non-null     object
 1    Temperature  14 non-null     object
 2    Humidity     14 non-null     object
 3    Windy        14 non-null     object
 4    Play         14 non-null     object
dtypes: object(5)
memory usage: 688.0+ bytes


In [11]:
cases.describe()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
count,14,14,14,14,14
unique,3,3,2,2,2
top,Sunny,Mild,High,False,Yes
freq,5,6,7,8,9


In [12]:
cases.isnull().sum()

Outlook         0
 Temperature    0
 Humidity       0
 Windy          0
 Play           0
dtype: int64

In [13]:
cases.dtypes

Outlook         object
 Temperature    object
 Humidity       object
 Windy          object
 Play           object
dtype: object

# Converting the categorical variables into integers

In [16]:
#Selecting the columns from library to use as base cases, except solutions
base = library.iloc[:,range(library.shape[1] - 1)] #Exclude the last column
base

Unnamed: 0,Outlook,Temperature,Humidity,Windy
0,Sunny,Hot,High,False
1,Sunny,Hot,High,True
2,Overcast,Hot,High,False
3,Rainy,Mild,High,False
4,Rainy,Cool,Normal,False
5,Rainy,Cool,Normal,True
6,Overcast,Cool,Normal,True
7,Sunny,Mild,High,False
8,Sunny,Cool,Normal,False
9,Rainy,Mild,Normal,False


In [17]:
#Initial one-hot encoding
base = pd.get_dummies(base)
problems = pd.get_dummies(cases)
base

Unnamed: 0,Outlook_Overcast,Outlook_Rainy,Outlook_Sunny,Temperature_ Cool,Temperature_ Hot,Temperature_ Mild,Humidity_ High,Humidity_ Normal,Windy_ False,Windy_ True
0,0,0,1,0,1,0,1,0,1,0
1,0,0,1,0,1,0,1,0,0,1
2,1,0,0,0,1,0,1,0,1,0
3,0,1,0,0,0,1,1,0,1,0
4,0,1,0,1,0,0,0,1,1,0
5,0,1,0,1,0,0,0,1,0,1
6,1,0,0,1,0,0,0,1,0,1
7,0,0,1,0,0,1,1,0,1,0
8,0,0,1,1,0,0,0,1,1,0
9,0,1,0,0,0,1,0,1,1,0


In [18]:
problems

Unnamed: 0,Outlook_Overcast,Outlook_Rainy,Outlook_Sunny,Temperature_ Cool,Temperature_ Hot,Temperature_ Mild,Humidity_ High,Humidity_ Normal,Windy_ False,Windy_ True,Play_ No,Play_ Yes
0,0,0,1,0,1,0,1,0,1,0,1,0
1,0,0,1,0,1,0,1,0,0,1,1,0
2,1,0,0,0,1,0,1,0,1,0,0,1
3,0,1,0,0,0,1,1,0,1,0,0,1
4,0,1,0,1,0,0,0,1,1,0,0,1
5,0,1,0,1,0,0,0,1,0,1,1,0
6,1,0,0,1,0,0,0,1,0,1,0,1
7,0,0,1,0,0,1,1,0,1,0,1,0
8,0,0,1,1,0,0,0,1,1,0,0,1
9,0,1,0,0,0,1,0,1,1,0,0,1


# Doing the calculations

1. Calculate inverse covariance matrix for the base cases.


2. Get the case to evaluate.


3. Calculate mahalanobis distance using case, base and inverse covariance matrix.


4. Minimum distances calculated will be stored.


5. Minimum distance calculated index will be used to solve the problem, by using the index solution in base cases.


6. Append solution to the library, to use it in future cases, and store other relevant data (eg. covariance heat maps).


7. If there are more cases, it evaluates, getting the new base (one-hot) encoded.

* The **Mahalanobis distance** is a measure of the distance between a point P and a distribution D

In [None]:
#Moving through all cases
for i in range(problems.shape[0]):
    #Get the inverse covariance matrix for the base cases
    covariance_matrix = base.cov()
    inverse_covariance_matrix = np.linalg.pinv(covariance_matrix) #Inverse
    
    #Get case row to evaluate
    case_row = problems.loc[i,:]
    
    # Empty distances array to store mahalanobis distances obtained comparing each library cases
    distances = np.zeros(base.shape[0])

    # For each base cases rows
    for j in range(base.shape[0]):
        # Get base case row
        base_row = base.loc[j, :]

        # Calculate mahalanobis distance between case row and base cases, and store it
        distances[j] = distance.mahalanobis(case_row, base_row, inverse_covariance_matrix)

    # Returns the index (row) of the minimum value in distances calculated
    min_distance_row = np.argmin(distances)
    
    # Get solution based on index of found minimum distance, and append it to main library
    # From cases, append library 'similar' solution
    case = np.append(cases.iloc[i,:],library.iloc[min_distance_row,-1])
    
    print(f'>For case/problem {i}:',{cases.iloc[i,:].to_numpy()},'solution is {case[-1]}')
    
    #Store
    case = pd.Series(case,index=library.columns) #Case with solution
    library = library.append(case,ignore_index=True)
    
    # Save 'covariance heat map (biased)' output as file
    sns.heatmap(np.cov(base,bias=True),annot=True,fmt='g')
    plt.gcf().set_size_inches(12,6)
    plt.title(f'Covariance Heat Map #{i} \n Library cases stored {j} - Base to solve problem {i}')
    plt.savefig(f'covariance_heat_map_{i}.png',bbox_inches='tight')
    plt.close()
    
    #Reuse
    base = library.iloc[:,range(library.shape[1] - 1)]
    base = pd.get_dummies(base)

In [None]:
# Save library output as file
library.to_csv('D:\\SLIIT\\3rd year 2nd sem\\Machine Learning amd Optimization Methods\\Coding\\library_output.csv', index = False)

In [None]:
library