In [51]:
import numpy as np
import pandas as pd

df = pd.read_csv("meeting-rooms.csv")

df

Unnamed: 0,row,room,capacity,date,nineToEleven,attendanceNineToEleven,elevenToOne,attendanceElevenToOne,oneToThree,attendanceOneToThree,threeToFive,attendanceThreeToFive
0,1,Pit-Lane,4,01/01/2024,True,2.0,True,3.0,False,,False,
1,2,Dry-lane,4,01/01/2024,True,4.0,False,,False,,False,
2,3,Joker Lap,4,01/01/2024,True,4.0,False,,True,4.0,False,
3,4,Quick 8,8,01/01/2024,True,7.0,False,,True,8.0,False,
4,5,Pole Position,20,01/01/2024,False,,True,14.0,True,15.0,True,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...
385,386,Dry-lane,4,29/03/2024,False,,True,3.0,False,,False,
386,387,Joker Lap,4,29/03/2024,False,,True,2.0,True,4.0,False,
387,388,Quick 8,8,29/03/2024,False,,True,6.0,True,5.0,True,7.0
388,389,Pole Position,20,29/03/2024,False,,True,10.0,True,17.0,True,18.0


In [52]:
features = ["row", "capacity", "attendanceNineToEleven", "attendanceElevenToOne", "attendanceOneToThree",
            "attendanceThreeToFive"]
target = ["nineToEleven", "elevenToOne", "oneToThree", "threeToFive"]
rooms = ["Pit-Lane", "Dry-lane", "Joker Lap", "Quick 8", "Pole Position", "Cockpit"]


def get_train_data_for_room(df, features, target, room):
    room_df = df.loc[df.room == room]
    X = room_df[features]
    y = room_df[target]

    return X, y

In [53]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score, KFold

model_classifier = RandomForestClassifier(n_estimators=50)

# cv_scores = cross_val_score(model, X, y, cv=5)
# 
# print("Cross-validation scores: ", cv_scores)
# print("Mean accuracy:", cv_scores.mean())

In [54]:
def cross_validation(clf, X, y):
    k_fold = KFold(n_splits=5, shuffle=True)

    fold = 0
    accs = []

    for train_idx, val_idx in k_fold.split(X, y):
        X_tr = X.iloc[train_idx]
        y_tr = y.iloc[train_idx]

        X_tst = X.iloc[val_idx]
        y_tst = y.iloc[val_idx]

        clf.fit(X_tr, y_tr)
        pred = clf.predict(X_tst)
        acc_score = accuracy_score(y_tst, pred)
        print(f"Fold {fold}")
        print(f"Accuracy: {acc_score}")
        accs.append(acc_score)
        fold += 1

    print("Mean accuracy: ", np.mean(accs))
    
    return clf


def validate_for_each_room(clf, rooms):
    for room in rooms:
        X, y = get_train_data_for_room(df, features, target, room)
        print(f"Cross-validation for {room}")
        cross_validation(clf,X, y)


validate_for_each_room(model_classifier,rooms)

Cross-validation for Pit-Lane
Fold 0
Accuracy: 1.0
Fold 1
Accuracy: 0.9230769230769231
Fold 2
Accuracy: 1.0
Fold 3
Accuracy: 1.0
Fold 4
Accuracy: 1.0
Mean accuracy:  0.9846153846153847
Cross-validation for Dry-lane
Fold 0
Accuracy: 1.0
Fold 1
Accuracy: 0.8461538461538461
Fold 2
Accuracy: 1.0
Fold 3
Accuracy: 1.0
Fold 4
Accuracy: 1.0
Mean accuracy:  0.9692307692307693
Cross-validation for Joker Lap
Fold 0
Accuracy: 1.0
Fold 1
Accuracy: 1.0
Fold 2
Accuracy: 1.0
Fold 3
Accuracy: 1.0
Fold 4
Accuracy: 1.0
Mean accuracy:  1.0
Cross-validation for Quick 8
Fold 0
Accuracy: 1.0
Fold 1
Accuracy: 1.0
Fold 2
Accuracy: 1.0
Fold 3
Accuracy: 1.0
Fold 4
Accuracy: 1.0
Mean accuracy:  1.0
Cross-validation for Pole Position
Fold 0
Accuracy: 0.8461538461538461
Fold 1
Accuracy: 1.0
Fold 2
Accuracy: 1.0
Fold 3
Accuracy: 0.8461538461538461
Fold 4
Accuracy: 1.0
Mean accuracy:  0.9384615384615385
Cross-validation for Cockpit
Fold 0
Accuracy: 0.9230769230769231
Fold 1
Accuracy: 1.0
Fold 2
Accuracy: 1.0
Fold 3
A

In [55]:
desks = pd.read_csv("hackathon-schema.csv")
print(desks.head())
print(desks.shape)

   row             desk        date  firstHalf  secondHalf
0    1  CLUJ_5_beta_1.1  01/01/2024      False        True
1    2  CLUJ_5_beta_1.2  01/01/2024       True       False
2    3  CLUJ_5_beta_1.3  01/01/2024      False        True
3    4  CLUJ_5_beta_1.4  01/01/2024       True       False
4    5  CLUJ_5_beta_2.1  01/01/2024       True        True
(8775, 5)


In [56]:
print(len(desks)//4)
total_number_of_desk_groups = 33
number_of_groups_with_four_desks = 31
desks_group_19 = 6
desks_group_18 = 5
number_of_desks = number_of_groups_with_four_desks * 4
print(number_of_desks + desks_group_18 + desks_group_19)

2193
135


In [57]:
# CLUJ_5_beta_1.1 -> CLUJ_5_beta_33.4
format = "CLUJ_5_beta_"

first_desk = format +"1.1"
first_desk_df = desks.loc[desks.desk == first_desk]
first_desk_df

def split_date(df):
    df.loc["day","month","year"] = df["date"].str.split('/', expand=True)
    df = df.drop("date", axis=1)
    df = df.expand()
    return df

In [58]:
split_date(first_desk_df)

TypeError: loc must be int