In [1]:
!which python

/home/tonatiu/Analysis/.venv/bin/python


In [39]:
#requirement.txt
!pip install pandas
!pip install openpyxl
!pip install streamlit
!pip install plotly
!pip install matplotlib



In [40]:
import pandas as pd
import plotly.graph_objects as go
import streamlit
import plotly.express as px
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio

# Database

In [41]:
# Read database
df = pd.read_excel("./Tabela_Transformada_GE_Tales.xlsx")

df.fillna("", inplace=True)

df.head(5)

Unnamed: 0,Colaborador,Marco zero,A1.1,A1.2,A2.1,A2.2,B1.1,B1.2,B2.1,B2.2,B2+.1,B2+.2,C1.1,C1.2
0,Adriano Apolinario,09/2023,07/2024,,,12/2025,,,,,,,,
1,Adriano Carvalho,,,,04/2024,,,,06/2025,,,,,
2,Aguinaldo Jesus,,,,,09/2023,07/2024,,,12/2025,,,,
3,Alessandro Camargo Lopes,,,,,,03/2023,,07/2024,,06/2025,,,
4,Alex Sales,,,,,,,,04/2023,,07/2024,,06/2025,


# TeamMap

## Distribuição de pessoas por nivel de proficiência

In [42]:
# Set the renderer to a compatible interactive one
pio.renderers.default = "iframe"  

# Load data
file_path = 'Planilha_inicial.xlsx'
sheet_name = 'Consolidado'
data = pd.read_excel(file_path, sheet_name=sheet_name)

# Rename the column for easier access
data.rename(columns={'Avaliação de proficiencia (classificação': 'Proficiency'}, inplace=True)

# Replace 'Ground zero' with 'Marco zero'
data['Proficiency'] = data['Proficiency'].replace('Ground zero', 'Marco zero')

# Define the order of CEFR levels
cefr_levels = [
    "Marco zero", "A1.1", "A1.2", "A2.1", "A2.2", 
    "B1.1", "B1.2", "B2.1", "B2.2", "B2+.1", 
    "B2+.2", "C1.1", "C1.2", "C1.3", "C2"
]

# Count occurrences of each CEFR level
proficiency_counts = data['Proficiency'].value_counts().reindex(cefr_levels, fill_value=0)

# Calculate percentages
total_count = proficiency_counts.sum()
proficiency_percentages = (proficiency_counts / total_count * 100).round(2)

# Prepare data for plotting
proficiency_data = pd.DataFrame({
    "Proficiency Level": cefr_levels,
    "Count": proficiency_counts.values,
    "Percentage": proficiency_percentages.values
})

# Create a custom color scale for the CEFR levels
color_scale = px.colors.qualitative.Set3  # Or choose another qualitative color scale

# Create a bar plot with a discrete color scale
fig = px.bar(
    proficiency_data,
    x="Proficiency Level",
    y="Percentage",
    title="Distribution of Proficiency Levels (Percentage)",
    labels={"Percentage": "Percentage (%)", "Proficiency Level": "CEFR Levels"},
    text="Percentage",
    color="Proficiency Level",  # Add color differentiation
    color_discrete_sequence=color_scale  # Use a qualitative color scale
)

# Improve layout and aesthetics
fig.update_traces(textposition='outside')
fig.update_layout(
    xaxis=dict(categoryorder='array', categoryarray=cefr_levels),
    template="plotly_white",  # Light theme for vibrancy
    title_font=dict(size=20, family='Arial, bold'),
    xaxis_title_font=dict(size=14, family='Arial'),
    yaxis_title_font=dict(size=14, family='Arial'),
)

# Show the plot
fig.show()


## Mapeamento individual

In [43]:
import pandas as pd
import plotly.graph_objects as go

# Load the Excel file
file_path = 'Tabela_Transformada_GE_Tales.xlsx'
english_data = pd.read_excel(file_path, sheet_name='Inglês')

# Check if 'Colaborador' column exists
if 'Colaborador' not in english_data.columns:
    raise KeyError("The column 'Colaborador' is not present in the DataFrame")

# Extract relevant columns
cef_columns = english_data.columns[2:]  # Assuming CEFR levels start from the third column
colaboradores = english_data[['Colaborador']]  # Extract "Colaborador" column
dates = english_data[cef_columns]

# Melt the data to long format for Plotly compatibility
long_data = pd.melt(pd.concat([colaboradores, dates], axis=1), id_vars=['Colaborador'], var_name='CEFR Level', value_name='Date')

# Filter out rows with NaN dates
long_data = long_data.dropna(subset=['Date'])

# Convert dates to a consistent format
long_data['Date'] = pd.to_datetime(long_data['Date'], errors='coerce', format='%m/%Y').dt.strftime('%m/%y')

# Create figure
fig = go.Figure()

# Add traces for each individual (Colaborador)
for name in long_data['Colaborador'].unique():
    individual_data = long_data[long_data['Colaborador'] == name].sort_values(by='CEFR Level')
    
    # Define marker colors and segments for line colors
    marker_colors = []
    line_segments = []
    
    if len(individual_data) == 2:
        # Two points: first is purple, second is green
        marker_colors = ['#391e70', '#adc22f']
        line_segments = ['#adc22f']
    elif len(individual_data) >= 3:
        # Three or more points
        marker_colors = ['#391e70'] * len(individual_data)  # Default all purple
        marker_colors[1] = '#391e70'  # Second point remains purple
        marker_colors[-1] = '#adc22f'  # Last point is green
        
        # Line colors: purple between first and second, green between last two
        line_segments = ['#391e70'] * (len(individual_data) - 1)  # Default all purple
        line_segments[-1] = '#adc22f'  # Last segment is green

    # Add scatter trace for the individual with split lines
    for i in range(len(individual_data) - 1):
        fig.add_trace(
            go.Scatter(
                x=individual_data['CEFR Level'].iloc[i:i+2],
                y=[name, name],
                mode='markers+lines',
                marker=dict(size=10, color=marker_colors[i:i+2]),
                line=dict(width=2, color=line_segments[i]),
                name=name if i == 0 else None,  # Only show legend for the first segment
                text=individual_data['Date'].iloc[i:i+2],  # Hover text with dates
                showlegend=(i == 0),
            )
        )

# Customize layout for scrollable chart
fig.update_layout(
    title='Team Map and Timeline of CEFR Level Progression',
    xaxis_title='CEFR Level',
    yaxis_title='Team Members',
    xaxis=dict(showgrid=True, fixedrange=True),  # Fix x-axis to prevent stretching
    yaxis=dict(
        showgrid=False,
        automargin=True,
        autorange='reversed',  # Ensure names are top-down
        type='category',
        range=[-0.5, len(long_data['Colaborador'].unique()) - 0.5],  # Dynamically adjust range
    ),
    template='plotly_white',  # Clean background
    height=800,  # Increased height for better visualization
    width=1000,  # Chart width
    legend=dict(title="Team Members"),
    margin=dict(l=50, r=50, t=50, b=50),  # Adjust margins for better space
)

# Add annotation for dates at the markers
for _, row in long_data.iterrows():
    fig.add_annotation(
        x=row['CEFR Level'],
        y=row['Colaborador'],
        text=row['Date'],
        showarrow=False,
        font=dict(size=10),
        align='center',
        xanchor='center',
    )

# Display the chart
fig.show()
