In [9]:
import pandas as pd

# Load your dataset
data = pd.read_csv('../data/platinum/report_data.csv')

# Inspect the data
print(data.head())

# Ensure the 'lastUpdated' is in datetime format
data['lastUpdated'] = pd.to_datetime(data['lastUpdated'])

# Sort the data by employee and lastUpdated
data.sort_values(by=['employeeId', 'lastUpdated'], inplace=True)
data.columns

   employeeId  courseId  employeeName  designationId  designationName  \
0           2       331  Allison Chan              5    IT Specialist   
1           2       331  Allison Chan              5    IT Specialist   
2           2       119  Allison Chan              5    IT Specialist   
3           2       119  Allison Chan              5    IT Specialist   
4           3       367  Scott Flores              7  Project Manager   

                                     courseName    difficulty language  \
0  Programmable even-keeled process improvement  Intermediate  English   
1  Programmable even-keeled process improvement  Intermediate  English   
2                   Secured interactive product      Beginner    Tamil   
3                   Secured interactive product      Beginner    Tamil   
4                 Up-sized multimedia challenge      Advanced   German   

   totalTime  totalModules progressStatus              lastUpdated  \
0        480            10    in_progress  202

Index(['employeeId', 'courseId', 'employeeName', 'designationId',
       'designationName', 'courseName', 'difficulty', 'language', 'totalTime',
       'totalModules', 'progressStatus', 'lastUpdated', 'modulesCompleted',
       'userSkills', 'courseSkills', 'designationSkills'],
      dtype='object')

In [3]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Create a feature for progress percentage
data['progressPercentage'] = data['modulesCompleted'] / data['totalModules']

# Use MultiLabelBinarizer to encode skills
from sklearn.preprocessing import MultiLabelBinarizer

# Assuming skills are stored in a list-like format
mlb = MultiLabelBinarizer()
skills_encoded = mlb.fit_transform(data['userSkills'].str.split(','))

# Combine encoded skills with main DataFrame
skills_df = pd.DataFrame(skills_encoded, columns=mlb.classes_)
data = pd.concat([data, skills_df], axis=1)


In [16]:
def get_employee_data(employee_id):
    return data[data['employeeId'] == employee_id]

employee_id = 2  # Replace with the actual employee ID
employee_data = get_employee_data(employee_id)

In [6]:
completed_courses = employee_data[employee_data['progressStatus'] == 'completed']
print(completed_courses[['courseName', 'progressPercentage', 'userSkills']])

Empty DataFrame
Columns: [courseName, progressPercentage, userSkills]
Index: []


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Assume 'data' is your DataFrame

# Prepare features and target variable
X = data.drop(['employeeId', 'courseId', 'employeeName', 'courseName', 'progressStatus', 'lastUpdated'], axis=1)
y = data['progressStatus'].apply(lambda x: 1 if x == 'completed' else 0)  # Binary target

# Identify categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(exclude=['object']).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),  # Keep numerical columns as is
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)  # Handle unknown categories
    ]
)
# Create a pipeline that first transforms the data then fits the model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# To make predictions, use:
# predictions = pipeline.predict(X_test)


In [18]:
def recommend_courses(employee_id):
    employee_data = get_employee_data(employee_id)
    completed_courses = set(employee_data['courseId'])

    # Create a DataFrame of all courses not completed by the employee
    all_courses = data[~data['courseId'].isin(completed_courses)]

    # Check if there are any courses to recommend
    if all_courses.empty:
        return pd.DataFrame(columns=['courseName', 'difficulty', 'totalTime', 'courseSkills'])

    # Prepare features for prediction
    features = all_courses.drop(['employeeId', 'courseId', 'employeeName', 'courseName', 'progressStatus', 'lastUpdated'], axis=1)

    # Predict which courses are likely to be completed using the pipeline
    predictions = pipeline.predict(features)  # Use the pipeline you defined earlier
    all_courses['predicted_completion'] = predictions

    # Recommend courses with predicted completion = 1
    recommended_courses = all_courses[all_courses['predicted_completion'] == 1]

    return recommended_courses[['courseName', 'difficulty', 'totalTime', 'courseSkills']]

# Get recommendations for the given employee
recommended = recommend_courses(3)
print(recommended)


                                             courseName    difficulty  \
1          Programmable even-keeled process improvement  Intermediate   
3                           Secured interactive product      Beginner   
12            Pre-emptive zero-defect open architecture      Advanced   
20               Distributed leadingedge superstructure      Advanced   
35                        Fundamental impactful project      Beginner   
...                                                 ...           ...   
2379                Intuitive leadingedge orchestration      Beginner   
2381            Implemented maximized Internet solution      Advanced   
2386                Universal 6thgeneration application  Intermediate   
2392      Reverse-engineered grid-enabled knowledgebase  Intermediate   
2390  Enterprise-wide solution-oriented customer loy...      Advanced   

      totalTime                                       courseSkills  
1           480  ['MBAL', 'Press Releases', 'FCoE', 'P

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_courses['predicted_completion'] = predictions


In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


In [33]:
# Assume 'data' is your DataFrame

# Prepare features and target variable
X = data.drop(['employeeId', 'courseId', 'employeeName', 'courseName', 'progressStatus', 'lastUpdated'], axis=1)
y = data['progressStatus'].apply(lambda x: 1 if x == 'completed' else 0)  # Binary target

# Identify categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(exclude=['object']).columns.tolist()


0       0
2       0
1       1
3       1
4       0
       ..
2392    1
2387    0
2388    0
2389    0
2390    1
Name: progressStatus, Length: 2393, dtype: int64

In [34]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),  # Keep numerical columns as is
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)  # Handle unknown categories
    ]
)

# Create a pipeline that first transforms the data then fits the model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])


In [47]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)


In [48]:
# Make predictions on the test set
y_pred = pipeline.predict(X_test)


In [49]:
# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)

# Print the metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(confusion)


Accuracy: 0.65
Precision: 0.19
Recall: 0.11
F1 Score: 0.14
Confusion Matrix:
[[444  84]
 [170  20]]


In [25]:
def recommend_courses(employee_id):
    employee_data = get_employee_data(employee_id)
    completed_courses = set(employee_data['courseId'])

    # Create a DataFrame of all courses not completed by the employee
    all_courses = data[~data['courseId'].isin(completed_courses)]

    # Check if there are any courses to recommend
    if all_courses.empty:
        return pd.DataFrame(columns=['courseName', 'difficulty', 'totalTime', 'courseSkills'])

    # Prepare features for prediction
    features = all_courses.drop(['employeeId', 'courseId', 'employeeName', 'courseName', 'progressStatus', 'lastUpdated'], axis=1)

    # Predict which courses are likely to be completed using the pipeline
    predictions = pipeline.predict(features)
    all_courses['predicted_completion'] = predictions

    # Recommend courses with predicted completion = 1
    recommended_courses = all_courses[all_courses['predicted_completion'] == 1]

    return recommended_courses[['courseName', 'difficulty', 'totalTime', 'courseSkills']]


In [83]:
# Get recommendations for the given employee
recommended = recommend_courses(120)
print(recommended)


                                             courseName    difficulty  \
1          Programmable even-keeled process improvement  Intermediate   
3                           Secured interactive product      Beginner   
5                         Up-sized multimedia challenge      Advanced   
9                                Cloned radical project      Advanced   
12            Pre-emptive zero-defect open architecture      Advanced   
...                                                 ...           ...   
2381            Implemented maximized Internet solution      Advanced   
2383               Stand-alone eco-centric installation  Intermediate   
2386                Universal 6thgeneration application  Intermediate   
2392      Reverse-engineered grid-enabled knowledgebase  Intermediate   
2390  Enterprise-wide solution-oriented customer loy...      Advanced   

      totalTime                                       courseSkills  
1           480  ['MBAL', 'Press Releases', 'FCoE', 'P

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_courses['predicted_completion'] = predictions


In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import VotingClassifier

# Prepare features and target variable
X = data.drop(['employeeId', 'courseId', 'employeeName', 'courseName', 'progressStatus', 'lastUpdated'], axis=1)
y = data['progressStatus'].apply(lambda x: 1 if x == 'completed' else 0)

categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(exclude=['object']).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Create classifiers
rf_classifier = RandomForestClassifier(random_state=42)
gb_classifier = GradientBoostingClassifier(random_state=42)

# Create an ensemble model
voting_classifier = VotingClassifier(estimators=[
    ('rf', rf_classifier),
    ('gb', gb_classifier)
], voting='soft')

# Create a pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', voting_classifier)
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Hyperparameter tuning
param_grid = {
    'classifier__rf__n_estimators': [50, 100],
    'classifier__rf__max_depth': [None, 10, 20],
    'classifier__gb__n_estimators': [50, 100],
    'classifier__gb__learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='f1')
grid_search.fit(X_resampled, y_resampled)

best_pipeline = grid_search.best_estimator_

# Cross-validation for better estimation
cv_scores = cross_val_score(best_pipeline, X, y, cv=5, scoring='f1')
print(f"Average F1 Score (CV): {cv_scores.mean():.2f}")

# Make predictions on the test set
y_pred = best_pipeline.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)

# Print the metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(confusion)

def recommend_courses(employee_id):
    employee_data = get_employee_data(employee_id)
    completed_courses = set(employee_data['courseId'])

    all_courses = data[~data['courseId'].isin(completed_courses)]
    if all_courses.empty:
        return pd.DataFrame(columns=['courseName', 'difficulty', 'totalTime', 'courseSkills'])

    features = all_courses.drop(['employeeId', 'courseId', 'employeeName', 'courseName', 'progressStatus', 'lastUpdated'], axis=1)
    predictions = best_pipeline.predict(features)
    all_courses['predicted_completion'] = predictions

    recommended_courses = all_courses[all_courses['predicted_completion'] == 1]
    return recommended_courses[['courseName', 'difficulty', 'totalTime', 'courseSkills']]

# Get recommendations for the given employee
recommended = recommend_courses(3)
print(recommended)


ModuleNotFoundError: No module named 'imblearn'

In [90]:
import pandas as pd
import ast  # For safely evaluating string representations of lists
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

# Load your dataset
data = pd.read_csv('../data/platinum/report_data.csv')

# Ensure the 'lastUpdated' is in datetime format
data['lastUpdated'] = pd.to_datetime(data['lastUpdated'])

# Sort the data by employee and lastUpdated
data.sort_values(by=['employeeId', 'lastUpdated'], inplace=True)

# Extract employee skills from the 'userSkills' column
def get_employee_skills(data):
    employee_skills = {}
    for index, row in data.iterrows():
        emp_id = row['employeeId']
        skills = ast.literal_eval(row['userSkills'])  # Safely evaluate the string representation of lists
        if emp_id not in employee_skills:
            employee_skills[emp_id] = set(skills)  # Store as a set for uniqueness
        else:
            employee_skills[emp_id].update(skills)  # Update existing set with new skills
    return employee_skills

# Get employee skills
employee_skills = get_employee_skills(data)

# Feature Engineering
def create_features(df):
    df['userSkillCount'] = df['userSkills'].apply(lambda x: len(ast.literal_eval(x)))  # Count unique skills
    df['courseSkillCount'] = df['courseSkills'].apply(lambda x: len(ast.literal_eval(x)))
    df['designationSkillCount'] = df['designationSkills'].apply(lambda x: len(ast.literal_eval(x)))
    df['progress'] = df['progressStatus']  # Target variable
    return df

data = create_features(data)

# Selecting features and target
X = data[['userSkillCount', 'courseSkillCount', 'designationSkillCount', 'totalTime', 'totalModules']]
y = data['progress']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model pipeline
model = RandomForestClassifier(random_state=42)

# Training the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Performance metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

# Results
print(f'Accuracy: {accuracy:.2f}')
print(f'F1 Score: {f1:.2f}')

# Recommendation Function
def recommend_courses(employee_id, employee_skills, data, top_n=3):
    employee_data = data[data['employeeId'] == employee_id]
    
    # Extracting the relevant employee skills
    employee_skill_set = employee_skills.get(employee_id, set())
    
    # Scoring courses based on skill match
    course_scores = []
    for index, row in data.iterrows():
        course_skill_set = set(ast.literal_eval(row['courseSkills']))
        match_score = len(employee_skill_set.intersection(course_skill_set))
        course_scores.append((row['courseId'], row['courseName'], match_score))
    
    # Sort courses by match score and select the top N
    recommended_courses = sorted(course_scores, key=lambda x: x[2], reverse=True)[:top_n]
    
    return recommended_courses

# Example usage of the recommendation function
emp_id = 2 # Replace with the employee ID you want to get recommendations for
recommended_courses = recommend_courses(emp_id, employee_skills, data)

# Display recommended courses
print("Recommended Courses:")
for course in recommended_courses:
    print(f"Course ID: {course[0]}, Course Name: {course[1]}, Skill Match Score: {course[2]}")


Accuracy: 0.59
F1 Score: 0.54
Recommended Courses:
Course ID: 331, Course Name: Programmable even-keeled process improvement, Skill Match Score: 4
Course ID: 331, Course Name: Programmable even-keeled process improvement, Skill Match Score: 4
Course ID: 119, Course Name: Secured interactive product, Skill Match Score: 3


In [98]:
import pandas as pd
import ast  # For safely evaluating string representations of lists
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

# Load your dataset
data = pd.read_csv('../data/platinum/report_data.csv')


# Ensure the 'lastUpdated' is in datetime format
data['lastUpdated'] = pd.to_datetime(data['lastUpdated'])

# Sort the data by employee and lastUpdated
data.sort_values(by=['employeeId', 'lastUpdated'], inplace=True)

# Extract employee skills from the 'userSkills' column
def get_employee_skills(data):
    employee_skills = {}
    for index, row in data.iterrows():
        emp_id = row['employeeId']
        skills = ast.literal_eval(row['userSkills'])  # Safely evaluate the string representation of lists
        if emp_id not in employee_skills:
            employee_skills[emp_id] = set(skills)  # Store as a set for uniqueness
        else:
            employee_skills[emp_id].update(skills)  # Update existing set with new skills
    return employee_skills

# Get employee skills
employee_skills = get_employee_skills(data)

# Feature Engineering
def create_features(df):
    df['userSkillCount'] = df['userSkills'].apply(lambda x: len(ast.literal_eval(x)))  # Count unique skills
    df['courseSkillCount'] = df['courseSkills'].apply(lambda x: len(ast.literal_eval(x)))
    df['designationSkillCount'] = df['designationSkills'].apply(lambda x: len(ast.literal_eval(x)))
    df['progress'] = df['progressStatus']  # Target variable
    return df

data = create_features(data)

# Selecting features and target
X = data[['userSkillCount', 'courseSkillCount', 'designationSkillCount', 'totalTime', 'totalModules']]
y = data['progress']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model pipeline
model = RandomForestClassifier(random_state=42)

# Training the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Performance metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

# Results
print(f'Accuracy: {accuracy:.2f}')
print(f'F1 Score: {f1:.2f}')

# Recommendation Function with Filter for Completed Progress
def recommend_courses(employee_id, employee_skills, data, model, top_n=5):
    # Extracting the relevant employee skills
    employee_skill_set = employee_skills.get(employee_id, set())
    
    # Scoring courses based on skill match and model predictions
    course_scores = []
    for index, row in data.iterrows():
        course_skill_set = set(ast.literal_eval(row['courseSkills']))
        
        # Skill match score
        match_score = len(employee_skill_set.intersection(course_skill_set))
        
        # Create a feature array for prediction
        feature_vector = [[
            len(employee_skill_set),         # userSkillCount
            len(course_skill_set),           # courseSkillCount
            len(ast.literal_eval(row['designationSkills'])),  # designationSkillCount
            row['totalTime'],                 # totalTime
            row['totalModules']               # totalModules
        ]]
        
        # Predict progress status for this course
        predicted_progress = model.predict(feature_vector)[0]
        
        # Only consider courses predicted to have progress as 'Completed'
        if predicted_progress == 3:  # Assuming '3' indicates 'Completed'
            course_scores.append((row['courseId'], row['courseName'], match_score))
    
    # Sort courses by skill match score and select the top N
    recommended_courses = sorted(course_scores, key=lambda x: x[2], reverse=True)[:top_n]
    
    return recommended_courses

# Example usage of the recommendation function
emp_id = 1  # Replace with the employee ID you want to get recommendations for
recommended_courses = recommend_courses(emp_id, employee_skills, data, model)

# Display recommended courses
print("Recommended Courses (Predicted as Completed):")
for course in recommended_courses:
    print(f"Course ID: {course[0]}, Course Name: {course[1]}, Skill Match Score: {course[2]}")


Accuracy: 0.59
F1 Score: 0.54




Recommended Courses (Predicted as Completed):




In [99]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load your dataset
data = pd.read_csv('../data/platinum/report_data.csv')

# Ensure the 'lastUpdated' is in datetime format
data['lastUpdated'] = pd.to_datetime(data['lastUpdated'])

# Combine userSkills and courseSkills for better representation
data['skills_combined'] = data['userSkills'] + ' ' + data['courseSkills']

# Create a TF-IDF vectorizer
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(data['skills_combined'])

# Calculate cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix)

# Function to get recommendations
def get_recommendations(employee_id, data, cosine_sim, top_n=5):
    idx = data[data['employeeId'] == employee_id].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n + 1]
    course_indices = [i[0] for i in sim_scores]
    return data.iloc[course_indices]

# Evaluate using cross-validation
def cross_validate_recommendations(data, num_iterations=5, top_n=5):
    all_recommendations = []
    for _ in range(num_iterations):
        # Split the data
        train_data, test_data = train_test_split(data, test_size=0.2)
        
        # Create TF-IDF and cosine similarity for the train set
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(train_data['skills_combined'])
        cosine_sim = cosine_similarity(tfidf_matrix)
        
        # Generate recommendations for test set employees
        for employee_id in test_data['employeeId'].unique():
            recommendations = get_recommendations(employee_id, train_data, cosine_sim, top_n)
            all_recommendations.append(recommendations['courseName'].tolist())
    
    return all_recommendations

# Generate recommendations and evaluate
recommendations = cross_validate_recommendations(data)
print("Sample Recommendations from Cross-Validation:\n", recommendations)

# Here you could analyze the recommendations for diversity, popularity, or manual checks


   employeeId  courseId  employeeName  designationId  designationName  \
0           2       331  Allison Chan              5    IT Specialist   
1           2       331  Allison Chan              5    IT Specialist   
2           2       119  Allison Chan              5    IT Specialist   
3           2       119  Allison Chan              5    IT Specialist   
4           3       367  Scott Flores              7  Project Manager   

                                     courseName    difficulty language  \
0  Programmable even-keeled process improvement  Intermediate  English   
1  Programmable even-keeled process improvement  Intermediate  English   
2                   Secured interactive product      Beginner    Tamil   
3                   Secured interactive product      Beginner    Tamil   
4                 Up-sized multimedia challenge      Advanced   German   

   totalTime  totalModules progressStatus              lastUpdated  \
0        480            10    in_progress  202

IndexError: index 0 is out of bounds for axis 0 with size 0