# MLEM2 Algorithm

In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

In [2]:
df = pd.read_csv('../data/trip_data.csv')

In [3]:
df = df.drop(['Case'],axis=1)

In [4]:
df

Unnamed: 0,Wind,Humidity,Temperature,Trip
0,4,low,medium,yes
1,8,low,low,yes
2,4,medium,medium,yes
3,8,medium,high,maybe
4,12,low,medium,maybe
5,16,high,low,no
6,30,high,high,no
7,12,high,high,no


#### --------------------------Defining Goals--------------------------

In [5]:
df_headers = list(df)
concept = df_headers[-1]
concept

'Trip'

In [6]:
#all unique concepts
concept_list = df[concept].unique()
concept_list

array(['yes', 'maybe', 'no'], dtype=object)

In [7]:
#calculating cases by concepts and making sets
temp_list = []
goal_list = []
for item in concept_list:
    for index, row in df.iterrows():
        if row[concept] == item:
            temp_list.append(index+1)
    goal_list.append(temp_list)
    temp_list = []

In [8]:
goal_list

[[1, 2, 3], [4, 5], [6, 7, 8]]

#### -------------------Finding numeric column---------------------

In [9]:
numeric_col = df.dtypes[df.dtypes != "object"].index[0]

In [10]:
numeric_col

'Wind'

In [11]:
sort_col = df[numeric_col].sort_values()

In [12]:
df['sort_col'] = sort_col.values

In [13]:
df

Unnamed: 0,Wind,Humidity,Temperature,Trip,sort_col
0,4,low,medium,yes,4
1,8,low,low,yes,4
2,4,medium,medium,yes,8
3,8,medium,high,maybe,8
4,12,low,medium,maybe,12
5,16,high,low,no,12
6,30,high,high,no,16
7,12,high,high,no,30


In [14]:
point_list = df['sort_col'].unique()

In [15]:
point_list

array([ 4,  8, 12, 16, 30])

In [16]:
avg_list = []
for i in range(len(point_list)-1):
    avg = (point_list[i] + point_list[i+1])/2
    avg_list.append(int(avg))

In [17]:
avg_list

[6, 10, 14, 23]

In [18]:
attributes = list(df)
attributes

['Wind', 'Humidity', 'Temperature', 'Trip', 'sort_col']

In [19]:
case_list = []
for i in avg_list:
    case = str(numeric_col) + "," + str(point_list[0]) + ".." + str(i)
    case2 = str(numeric_col) + "," + str(i) + ".." + str(point_list[len(point_list)-1])
    case_list.append(case)
    case_list.append(case2)

In [20]:
case_list

['Wind,4..6',
 'Wind,6..30',
 'Wind,4..10',
 'Wind,10..30',
 'Wind,4..14',
 'Wind,14..30',
 'Wind,4..23',
 'Wind,23..30']

In [21]:
for item in attributes[:-2]:
    print(item)
    #check for non numeric columns
    if not is_numeric_dtype(df[item]):
        temp = df[item].unique()
        for i in temp:
            case = item + "," + i
            case_list.append(case)
        

Wind
Humidity
Temperature


In [22]:
case_list

['Wind,4..6',
 'Wind,6..30',
 'Wind,4..10',
 'Wind,10..30',
 'Wind,4..14',
 'Wind,14..30',
 'Wind,4..23',
 'Wind,23..30',
 'Humidity,low',
 'Humidity,medium',
 'Humidity,high',
 'Temperature,medium',
 'Temperature,low',
 'Temperature,high']

#### ---------------Building (a,v) pairs---------------------

In [23]:
temp_list = []
att_val_list = []
for item in case_list:
    a,b = item.split(",") #a = attribute and b = value
    if "." in b:
        start,end = b.split("..")
        for index, row in df.iterrows():
            if row[a] >= int(start) and row[a] <= int(end):
                temp_list.append(index+1)
        print(temp_list)
        att_val_list.append(temp_list)
        temp_list = []
        
    else:
        for index, row in df.iterrows():
            if row[a] == b:
                temp_list.append(index+1)
        print(temp_list)
        att_val_list.append(temp_list)
        temp_list = []

[1, 3]
[2, 4, 5, 6, 7, 8]
[1, 2, 3, 4]
[5, 6, 7, 8]
[1, 2, 3, 4, 5, 8]
[6, 7]
[1, 2, 3, 4, 5, 6, 8]
[7]
[1, 2, 5]
[3, 4]
[6, 7, 8]
[1, 3, 5]
[2, 6]
[4, 7, 8]


In [24]:
att_val_list

[[1, 3],
 [2, 4, 5, 6, 7, 8],
 [1, 2, 3, 4],
 [5, 6, 7, 8],
 [1, 2, 3, 4, 5, 8],
 [6, 7],
 [1, 2, 3, 4, 5, 6, 8],
 [7],
 [1, 2, 5],
 [3, 4],
 [6, 7, 8],
 [1, 3, 5],
 [2, 6],
 [4, 7, 8]]

In [25]:
#Creating data for case and att-value list
data = {'Cases': case_list, 'att_val': att_val_list}

In [26]:
df2 = pd.DataFrame(data)

In [27]:
#Cases and corresponding att-value pairs
df2

Unnamed: 0,Cases,att_val
0,"Wind,4..6","[1, 3]"
1,"Wind,6..30","[2, 4, 5, 6, 7, 8]"
2,"Wind,4..10","[1, 2, 3, 4]"
3,"Wind,10..30","[5, 6, 7, 8]"
4,"Wind,4..14","[1, 2, 3, 4, 5, 8]"
5,"Wind,14..30","[6, 7]"
6,"Wind,4..23","[1, 2, 3, 4, 5, 6, 8]"
7,"Wind,23..30",[7]
8,"Humidity,low","[1, 2, 5]"
9,"Humidity,medium","[3, 4]"


#### --------------Developing MLEM2 Algorithm--------------------

In [28]:
goalIntersect = []
def mlemAlgo(goal):
    for index, row in df2.iterrows():
        #List containing intersection of (a,v) pairs and goal
        goalIntersect.append(set(row['att_val']).intersection(set(goal)))
        
    #Insert new column with the recent iteration
    df2.insert(2, 'goal_intersect', goalIntersect)

In [29]:
mlemAlgo(goal_list[0])

In [30]:
df2

Unnamed: 0,Cases,att_val,goal_intersect
0,"Wind,4..6","[1, 3]","{1, 3}"
1,"Wind,6..30","[2, 4, 5, 6, 7, 8]",{2}
2,"Wind,4..10","[1, 2, 3, 4]","{1, 2, 3}"
3,"Wind,10..30","[5, 6, 7, 8]",{}
4,"Wind,4..14","[1, 2, 3, 4, 5, 8]","{1, 2, 3}"
5,"Wind,14..30","[6, 7]",{}
6,"Wind,4..23","[1, 2, 3, 4, 5, 6, 8]","{1, 2, 3}"
7,"Wind,23..30",[7],{}
8,"Humidity,low","[1, 2, 5]","{1, 2}"
9,"Humidity,medium","[3, 4]",{3}
