# Predictor Analysis

Code authored by: Shaw Talebi

### imports

In [1]:
from IPython.display import display, Markdown
import pandas as pd
from utils import load_clean_data, send_openai_request, df_to_markdown

### functions

In [2]:
def create_kpi_summary(df, kpi_list, category_column):
    """
    Create a summary table of KPIs by category.
    
    Args:
        df (pandas.DataFrame): Input dataframe
        kpi_list (list): List of KPI column names to analyze
        category_column (str): Column name to group by
        
    Returns:
        summary_table: List of dictionaries containing summary statistics for each category
    """
    summary_list = []
    
    for category in df[category_column].unique():
        # Filter data for this category
        category_data = df[df[category_column] == category]

        total_count = len(category_data)
        percentage = len(category_data)/len(df) * 100
        
        row = {category_column: category}
        
        # For each KPI, calculate total count, count where KPI is True, and percentage
        for kpi in kpi_list:
            
            kpi_true_count = category_data[kpi].sum()
            kpi_percentage = (kpi_true_count / total_count * 100) if total_count > 0 else 0
            
            row[f'count'] = total_count
            row[f'percent'] = round(percentage, 1)
            row[f'{kpi}_true_count'] = kpi_true_count
            row[f'{kpi}_percentage'] = round(kpi_percentage, 1)
        
        summary_list.append(row)
    
    return pd.DataFrame(summary_list)

In [3]:
def format_join_reason_cluster(df, cluster_num):
    
    df_temp = df[df['course_join_question_cluster'] == cluster_num]

    output = f"""### Cluster {cluster_num}:
| Join Reason Response | Summarized Response | Job Category |
|---------------------|-------------------|--------------|
"""
    
    for index, row in df_temp.iterrows():
        # Escape pipe characters and replace newlines with spaces
        join_reason = str(row['course_join_question']).replace('|', '\\|').replace('\n', ' ')
        summarized = str(row['course_join_question_summarized']).replace('|', '\\|').replace('\n', ' ')
        job_category = str(row['job_category']).replace('|', '\\|').replace('\n', ' ')
        
        output = f'{output}| {join_reason} | {summarized} | {job_category} |\n'
    
    return f"\n{output}"

### load data

In [4]:
df = load_clean_data("students_activity_reviews")

### analysis

In [6]:
kpi_list = ['projects_submitted_gte_3', 'rating_exists', 'rating_gte_10']

#### KPIs by role

In [7]:
role_summary = create_kpi_summary(df, kpi_list, 'job_category')
role_summary

Unnamed: 0,job_category,count,percent,projects_submitted_gte_3_true_count,projects_submitted_gte_3_percentage,rating_exists_true_count,rating_exists_percentage,rating_gte_10_true_count,rating_gte_10_percentage
0,entrepreneur,18,19.6,8,44.4,10,55.6,8,44.4
1,unknown,10,10.9,0,0.0,2,20.0,1,10.0
2,ic,34,37.0,5,14.7,9,26.5,4,11.8
3,leader,15,16.3,3,20.0,8,53.3,8,53.3
4,manager,14,15.2,3,21.4,5,35.7,4,28.6
5,student,1,1.1,0,0.0,1,100.0,1,100.0


#### Company size by role

In [8]:
company_size_list = []
for column_name in df.columns:
    if "company_size" in column_name and df[column_name].dtype == int:
        company_size_list.append(column_name)
company_size_list

['company_size_category_enterprise',
 'company_size_category_smb',
 'company_size_category_solo',
 'company_size_category_unknown']

In [9]:
role_size_summary = create_kpi_summary(df, company_size_list, 'job_category')
role_size_summary

Unnamed: 0,job_category,count,percent,company_size_category_enterprise_true_count,company_size_category_enterprise_percentage,company_size_category_smb_true_count,company_size_category_smb_percentage,company_size_category_solo_true_count,company_size_category_solo_percentage,company_size_category_unknown_true_count,company_size_category_unknown_percentage
0,entrepreneur,18,19.6,0,0.0,3,16.7,15,83.3,0,0.0
1,unknown,10,10.9,1,10.0,0,0.0,0,0.0,9,90.0
2,ic,34,37.0,21,61.8,9,26.5,0,0.0,4,11.8
3,leader,15,16.3,6,40.0,8,53.3,0,0.0,1,6.7
4,manager,14,15.2,7,50.0,4,28.6,0,0.0,3,21.4
5,student,1,1.1,1,100.0,0,0.0,0,0.0,0,0.0


#### KPIs by source

In [10]:
source_summary = create_kpi_summary(df, kpi_list, 'source')
source_summary

Unnamed: 0,source,count,percent,projects_submitted_gte_3_true_count,projects_submitted_gte_3_percentage,rating_exists_true_count,rating_exists_percentage,rating_gte_10_true_count,rating_gte_10_percentage
0,Waitlist,24,26.1,3,12.5,8,33.3,5,20.8
1,Started payment,52,56.5,13,25.0,22,42.3,17,32.7
2,Lightning Lesson,11,12.0,2,18.2,3,27.3,2,18.2
3,Enrolled (Free),2,2.2,0,0.0,0,0.0,0,0.0
4,Uploaded to waitlist,3,3.3,1,33.3,2,66.7,2,66.7


#### KPIs by join reason cluster

In [11]:
source_summary = create_kpi_summary(df, kpi_list, 'course_join_question_cluster')
source_summary.sort_values('course_join_question_cluster')

Unnamed: 0,course_join_question_cluster,count,percent,projects_submitted_gte_3_true_count,projects_submitted_gte_3_percentage,rating_exists_true_count,rating_exists_percentage,rating_gte_10_true_count,rating_gte_10_percentage
2,1,17,18.5,3,17.6,4,23.5,4,23.5
4,2,2,2.2,0,0.0,0,0.0,0,0.0
1,3,20,21.7,6,30.0,7,35.0,5,25.0
3,4,12,13.0,4,33.3,8,66.7,5,41.7
0,5,41,44.6,6,14.6,16,39.0,12,29.3


### review join reason clusters

In [12]:
cluster_num_list = [4, 1, 2, 3, 5]

user_input = ""
for cluster_num in cluster_num_list:
    user_input = user_input + format_join_reason_cluster(df, cluster_num)

display(Markdown(user_input))


### Cluster 4:
| Join Reason Response | Summarized Response | Job Category |
|---------------------|-------------------|--------------|
| A methodology to use this technology to build high ROI systems | Goal: Learn to build high ROI systems using this technology | ic |
| getting more out of generative AI and LLM applications | Goal: Improve understanding and application of generative AI and LLMs | ic |
| Learn how to create AI systems (LLMs and RAGs) that use my own data sets (incl the tokenization of those data sets). I want to understand how to host them (locally or in the cloud) and have a better idea about which open-source models to choose from.  | Goal: Learn to create, host, and choose open-source AI models using personal data | ic |
| I'm a full time physics and ICT teacher trying to change careers into consulting as a data scientist/analyst. I would really like to be able to make at least a simple generative AI app/tool by myself.   | Goal: Transition to data science and create a generative AI app/tool | ic |
| Practical ramp-up the learning curve for building AI applications | Goal: Accelerate learning to build AI applications | entrepreneur |
| learn to build RAG based applications and learn fine-tuning of models | Goal: Learn to build RAG applications and fine-tune models | leader |
| Ability to build effective real world solutions with GenAI/ LLMs  | Goal: Build real-world solutions using GenAI and LLMs | leader |
| I am on a journey to become a Citizen Data Scientist. I want to build a solid foundation in maximizing the value of data using modern techniques.  | Goal: Become a Citizen Data Scientist and master data value techniques | leader |
| Learn how to train with a dataset and predictive analysis, and build a app. | Goal: Learn dataset training, predictive analysis, and app building | entrepreneur |
| good fundamental understanding of building practical applications | Goal: Gain fundamental understanding to build practical applications | leader |
| Experience setting up my own environment and how to compare it to my companies existing machine learning analytics environment using XGBoost | Goal: Learn environment setup and compare with company's ML analytics using XGBoost | ic |
| Build a good foundation for AI models and apps | Goal: Build a foundation for AI models and applications | leader |

### Cluster 1:
| Join Reason Response | Summarized Response | Job Category |
|---------------------|-------------------|--------------|
| Gain hands-on experience interfacing with LLMs, networking | Goal: Gain hands-on experience with LLMs and networking | ic |
| I am a data scientist but have limited experience with LLM application. I want to get more experience through real world projects and I want to learn from Shaw's experience creating valuable AI products.  | Goal: Gain LLM experience through projects and learn from Shaw's AI product expertise | ic |
| A structured way to build more advanced apps and flows out of my norm Improve my skills to negotiate increased salaries | Goal: Build advanced apps and improve negotiation skills | ic |
| Practical experience of playing with LLM! | Goal: Gain practical experience with LLMs | manager |
| Learn how to train and apply llm in real use cases like ingest legacy code and create a llm that can suggest recoding or triage quickly former legacy code | Goal: Learn to train and apply LLMs for legacy code analysis and suggestions | manager |
| Practical hands on experience of building LLM tools | Goal: Gain practical hands-on experience building LLM tools | manager |
| Learn basics and want to know what all fits where in the game .  I am not a coder and won’t code .  But from product management perspective I want to know what all buzz words are and where do they fit ?  Like RAG .. hugging face and all other industry trends  I keep hearing but I don’t know what exactly to use them and where to use them -  what is hugging face and who are their competitions and why to use them ?  Similarly RAG etc etc etc  | Goal: Understand industry trends, buzzwords, and their applications in product management | entrepreneur |
| Getting skills (for my resume) | Goal: Gain skills to enhance resume | ic |
| Advancement in career | Goal: Career advancement | ic |
| Broaden my skill set as a software engineer | Goal: Broaden software engineering skills | ic |
| Knowledge on LLMs and Prompt Engineering.  | Goal: Gain knowledge on LLMs and Prompt Engineering | ic |
| How to fine tune LLM effectively, and what LLM models I should be using for projects. | Goal: Learn to fine-tune LLMs and select suitable models for projects | entrepreneur |
| Learn to create my own LLM | Goal: Learn to create my own LLM | entrepreneur |
| Hands on learning | Goal: Hands-on learning experience | unknown |
| Hands on skills  | Goal: Gain hands-on skills | leader |
| Hands-on experience with building with LLMs that I can implement into the AI-powered accounting platform I'm working on | Goal: Gain hands-on LLM experience for AI accounting platform | entrepreneur |
| practical knowledge | Goal: Gain practical knowledge | manager |

### Cluster 2:
| Join Reason Response | Summarized Response | Job Category |
|---------------------|-------------------|--------------|
| No response provided | No response provided | ic |
| n | No response provided | ic |

### Cluster 3:
| Join Reason Response | Summarized Response | Job Category |
|---------------------|-------------------|--------------|
| I would like to start to have a grasp of how I can use AI to build products. | Goal: Learn to use AI for building products | ic |
| Two things: (1) Build AI applications to streamline my work, primarily for corporate finance / bizops use case. (2) Ship a prototype for software ideas that's been sitting in the backburner. | Goal: Build AI for finance/bizops and develop a prototype for software ideas | leader |
| Being able to build end to end AI applications. | Goal: Build end-to-end AI applications | unknown |
| Gain knowledge and skills to be able to develop AI powered automation application for regulatory compliance tasks such as RAG, Knowledge Graph, Langchain, etc.. | Goal: Learn to develop AI automation for regulatory compliance tasks | entrepreneur |
| Upskilling in for AI, a better understanding of building prototypes and products. | Goal: Upskilling in AI, prototype and product development | manager |
| Basic understanding of AI workflows.  I work with customers on presenting and selling technology on a corporate scale (Sales Solutions Architect), so am looking to get an understanding of the selection/discovery/build and operation process that's involved with selecting and building an AI enabled application (Sales Solutions Architect) | Goal: Understand AI workflows for sales solutions and application development | ic |
| Better understanding and skill set in AI to make my resume stand out in data science | Goal: Improve AI skills to enhance data science resume. | ic |
| Implement AI to help small businesses grow and be more efficient. | Goal: Use AI to help small businesses grow and improve efficiency | entrepreneur |
| Build AI tools | Goal: Build AI tools | ic |
| Get to be fully hands on in the weeds to run ML models, large scale data pipelines and become an AI expert to be able to build AI agents | Goal: Become an AI expert by mastering ML models, data pipelines, and building AI agents | ic |
| Become an expert at AI | Goal: Become an AI expert | manager |
| Ability to build AI use cases for various world problems including in Transportation | Goal: Build AI use cases for global problems, including transportation | unknown |
| How to use modern AI tools for freelance | Goal: Learn to use AI tools for freelancing | leader |
| Become AI enabled | Goal: Become AI enabled | unknown |
| Build an AI product | Goal: Build an AI product | entrepreneur |
| I want to build A.I. agents to make my job easier  | Goal: Build AI agents to simplify work tasks | entrepreneur |
| Build AI solutions  | Goal: Build AI solutions | ic |
| Build more with AI | Goal: Build more with AI | ic |
| Feel comfortable building various kinds of AI projects. | Goal: Build confidence in creating various AI projects | manager |
| how to start my own ai business | Goal: Learn how to start an AI business | entrepreneur |

### Cluster 5:
| Join Reason Response | Summarized Response | Job Category |
|---------------------|-------------------|--------------|
| I would like to get practical experience in building and deploying ai applications. I have some grounding the basics, but I want to be able to get to the point of actually deploying apps.  | Goal: Gain practical experience in building and deploying AI applications | entrepreneur |
| New to the field, I will learn as much as I can so I can grow and buikd a career in AI | Goal: Learn and grow a career in AI. | unknown |
| Learn more about AI customization. Building my own program using different tools . Learning also basics and fundamentals of AI models and in which context to use them . | Goal: Learn AI customization, build programs, and understand AI fundamentals | unknown |
| Uplevel my knowledge on AI and ideally be able to build something using AI. | Goal: Improve AI knowledge and build AI projects | leader |
| I would like to switch over to AI Product Management from Operational Program Management. I got laid-off in Oct'24. Even since, I tried learning, did a lot of exercise of my own. I'm not looking for new opportunity till I'm ready for AI. Here are my goals - Real time and Practicle AI Product Implementation - Real time build Product Strategy - Real time do Product Design  Rest are covered in the course, I shall try to pick them to fulfill my goals.  Shaw, I'm excited to learn from you!  Thank you | Goal: Transition to AI Product Management, learn real-time AI product development and strategy | manager |
| AI basics, introduction and how to use it my work  | Goal: Learn AI basics and usage for work | manager |
| I want to gain practical knowledge of end to end AI process through building projects. | Goal: Gain practical knowledge of end-to-end AI through projects | ic |
| Understand and create projects using AI where most appropriate | Goal: Learn to create AI projects where suitable | ic |
| A better understanding of AI and how to bring it into the Trackstack platform | Goal: Understand AI and integrate it into Trackstack platform | ic |
| - Technical skills: learn to use LLMs/ AI - Side projects: build up portfolio of side projects - Community: network with other data scientists/ software engineers  | Goal: Learn AI skills, build projects, and network with professionals | entrepreneur |
| Hands on experience in learning AI that will help explore opportunities on how we can integrate AI in building solutions as a Software Engineer. | Goal: Gain hands-on AI experience to integrate AI into software solutions | ic |
| I would like to understand AI, how to create LLM and some of the mathematics behind AI. | Goal: Understand AI, create LLMs, and learn AI mathematics | ic |
| Knowledge to build AI applications. | Goal: Gain knowledge to build AI applications | student |
| Bridge any knowledge gaps and improve my understanding of building w/ AI | Goal: Bridge knowledge gaps and improve understanding of building with AI | manager |
| Developer-level understanding of Gen-AI and related tech. | Goal: Achieve developer-level understanding of Gen-AI and related tech | ic |
| Learn about AI and automating AI tasks with python.  | Goal: Learn AI and automate AI tasks with Python | manager |
| To learn practical AI skills that I can apply to real life use-cases. | Goal: Learn practical AI skills for real-life applications | leader |
| I would like to deepen my understanding of AI concepts and learn how to apply them effectively in real-world projects. I'm especially interested in exploring practical techniques for building intelligent systems. | Goal: Deepen AI understanding and learn practical techniques for intelligent systems | entrepreneur |
| ine Hands on completion of several GenAI projects - Python program and environment using OpenAI API (AND calling a web search engine AND scraping a data point from a Yahoo Finance point where the data value is NOT present in the page's HTML but is called by that HTML),  Fine-tuning a real model, actually incorporating a RAG, creating a GPT and knowing the optimum use case for it, creating a simple AI agent--that is fast; managing an AI project.  All the other items mentioned in your syllabus. But the essence is doing the actual programming and getting my own working model hands on with your assistance, not just reading about doing. | Goal: Hands-on experience with GenAI projects, programming, and managing AI models | leader |
| Delving deeper into practical AI with more hands on guided projects and bringing them to production level in support of a portfolio.  | Goal: Gain practical AI skills and build a portfolio with real projects | manager |
| Get hands on experience with AI projects and learn on a schedule. | Goal: Gain hands-on AI experience and learn on a schedule | ic |
| Practical AI foundation | Goal: Gain practical AI foundation | ic |
| Better understanding of utilizing AI tools beyond just using a chat bot.  | Goal: Improve understanding of AI tools beyond chatbots | ic |
| Basic knowledge on how to use AI  | Goal: Learn basic AI usage skills | leader |
| I'm currently in college and about to transition into my Bachelor's in Computer Science. Through this course, I would like to learn more about AI to create real-world applications. | Goal: Learn AI to create real-world applications for Computer Science | unknown |
| Get a better understanding of the mechanics of AI by applying concepts in practice.  | Goal: Understand AI mechanics through practical application | leader |
| I would like to learn AI basics and a panoramic picture of the alternatives to structure, develop and  implement AI projects.  I come from the Power Generation industry and would like to apply AI in my field, as well as for Applied AI consulting projects. | Goal: Learn AI basics and apply AI in Power Generation and consulting projects | entrepreneur |
| I am VP technology in professional service organization. Planning to get a good hold on AI practise so that I can adopt and implement it within our organization  | Goal: Learn AI to adopt and implement it in the organization | leader |
| AI Basic | Goal: Learn basic AI concepts | unknown |
| As a tech leader, I want to understand the possibilities of ML and AI in real-world projects and become able to help my tech teams with the right decision in terms of architecture, alghoritm, planning etc | Goal: Understand ML/AI for real-world projects and support tech teams' decisions | entrepreneur |
| Learn something new about AI | Goal: Learn new information about AI | unknown |
| Get more familiar with AI and have some knowledge and hands on experience to talk confidently about AI in interviews. End goal is to pivot to a career in AI.  | Goal: Gain AI knowledge and experience to pivot to an AI career. | manager |
| Practical applications for AI and an understanding of how to model AI systems at a technical level. | Goal: Learn practical AI applications and technical modeling skills | ic |
| experience with AI project and regularity of doing AI activities | Goal: Gain AI project experience and maintain regular AI activities | entrepreneur |
| I'm excited to learn from your real-world AI experience, as well as gain insights into the industry's structure and best practices. | Goal: Learn from real-world AI experience and industry insights | ic |
| Hands-on experience to build AI products | Goal: Gain hands-on experience to build AI products | entrepreneur |
| Get foundational knowledge on AI and related technologies. Create AI solutions for business use cases | Goal: Learn AI fundamentals and develop business AI solutions | leader |
| Gain a good grasp of the concepts and principles of artificial intelligence. | Goal: Understand AI concepts and principles | unknown |
| Get more hands on with AI | Goal: Gain practical experience with AI | manager |
| I would like to learn how ai works and how to use it properly for better results. | Goal: Learn how AI works and use it effectively for better results | ic |
| Intro to AI programming with Python | Goal: Learn AI programming with Python | ic |


In [13]:
instructions = "What are the key differences between Cluster 4 and the others?"
response = send_openai_request(instructions, user_input)

In [14]:
display(Markdown(response))

Here is a **synthesized analysis** of the key differences between **Cluster 4** and the other clusters, focusing on themes, level of specificity, and audience.

---

## Key Differences: Cluster 4 vs. Others

**1. Depth and Specificity of Goals**

- **Cluster 4:**  
  - Highly specific, technical, and **application-oriented** goals.
  - Participants mention concrete **technologies** (e.g., LLMs, RAG, tokenization, open-source models, XGBoost, hosting environments).
  - Desire to understand detailed processes like **model fine-tuning**, training with private datasets, and deployment (locally or cloud).
  - Several want to make informed choices about frameworks and tools.

- **Other Clusters:**
  - Generally **broader or more generic** goals.
  - Focus on wanting "hands-on experience," "AI basics," "career transition," or "understanding AI applications."
  - Some mention tool names (e.g., Cluster 1 citing LLMs, RAG, Hugging Face), but not in the context of an end-to-end application pipeline.
  - Many entries about "improving resumes," "gaining knowledge," "building practical skills," without specifying complex technical goals.

---

**2. Level of Technical Experience**

- **Cluster 4:**  
  - Appears to cater to **intermediate to advanced learners** who already have some background and now wish to:
    - Deep-dive into **methodologies**.
    - Make decisions (e.g., tool/model selection, environment comparison).
    - Understand all steps from data ingestion to deployment.
  - Less focus on "basics," more on **optimization and value generation**.

- **Other Clusters:**  
  - Encompasses a wide spectrum: from **newcomers and career changers** (Clusters 5, part of 1) to those wanting to "become AI enabled" or "start with basics."
  - Broader appeal—fits career upskilling, portfolio building, or general familiarity with AI.

---

**3. Profession/Background**

- **Cluster 4:**
  - Includes job categories like "ic" (individual contributors), "entrepreneur," "leader."
  - Several statements around transitioning into data science, building as a data scientist, or supporting organizational/data value goals.
  - Some have teaching backgrounds or are seeking to leverage existing expertise into AI consulting or specialist roles.

- **Other Clusters:**  
  - Broader job mix—students, product managers, software engineers, business leaders, those exploring AI for use in non-technical careers.
  - More focused on **career change** or **career growth.**

---

**4. Application Domains and Use Cases**

- **Cluster 4:**  
  - Emphasis on building **high-ROI, real-world AI applications**.
  - Multiple mentions of **app/tool creation**, **custom dataset training**, and **company environment comparison**.
  - Stronger focus on value realization from AI, not just learning for its own sake.

- **Other Clusters:**  
  - Application areas are mentioned (e.g., AI for small business, product management, regulatory compliance, etc.) but tend to be higher-level, exploratory, or aspirational.
  - Many express intent to **learn enough to get started** or know what is possible, rather than execute complex projects.

---

## **Summary Table**

| Aspect                 | Cluster 4                                         | Other Clusters                                     |
|------------------------|---------------------------------------------------|----------------------------------------------------|
| **Goal Specificity**   | Highly specific, often technical methodologies    | Generally broad or generic, skills-focused         |
| **Technical Depth**    | Intermediate/advanced, end-to-end understanding   | Mixed: basic to intermediate, foundational learning|
| **Career Focus**       | Upskilling, value creation, consulting/data science| Career change, early exploration, general upskilling|
| **Use Case Orientation**| Real-world, high-ROI, full system dev/deployment | Exploratory, hands-on basics, career enhancement   |
| **Examples**           | Fine-tuning, dataset tokenization, infra comparison| Hands-on tools, resume building, basic AI projects |

---

## **In Short**

**Cluster 4 stands out** due to its strong focus on mastering **all technical aspects** of building, deploying, and optimizing bespoke, high-value AI systems—especially for those already possessing some background who wish to become advanced practitioners or consultants.  
**Other clusters** are more likely to gather those seeking broad understanding, foundational skills, career pivots, or exploration of where AI can fit into their professional lives.

In [15]:
# write response to file
with open('data/4-icp/join_reason_cluster_analysis.md', 'w') as f:
    f.write(response)

### 5-star reviews

In [16]:
df_5star = df[df['rating_gte_10']]

cols_to_keep = ['job_title', 'job_category', 'company_size_category', 'course_join_question', 'course_join_question_summarized']

In [17]:
df_to_markdown(df_5star[cols_to_keep], "data/4-icp/5-star-customers.md", title="5-Star Customers")