In [121]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

# Descripción del Notebook: Limpieza y Preparación del Dataset de Stack Overflow 2023
En este notebook se documenta el proceso completo de limpieza y preparación del dataset de la encuesta Stack Overflow 2023. Se llevaron a cabo las siguientes tareas principales:
- Imputación de Valores Faltantes: Se abordaron las celdas con valores NaN utilizando técnicas adecuadas para garantizar la integridad de los datos.
- Codificación de Variables Categóricas: Se transformaron las variables categóricas en un formato compatible con algoritmos de aprendizaje automático.
- Dataset Preparado para Modelado: El conjunto de datos final se dejó completamente listo para su integración en un modelo predictivo.

#### Este notebook sirvió como base para el desarrollo del modelo definitivo implementado en el pipeline del proyecto. No forma parte del código funcional del pipeline, sino que se incluye como una guía detallada del proceso paso a paso, documentando las decisiones tomadas y asegurando la reproducibilidad del trabajo.

### 1. Configuración inicial

In [214]:
# Establecer la opción para mostrar todas las columnas
pd.set_option('display.max_columns', None)

In [215]:
# Cargar el DataFrame desde el archivo
with open('../Pickles/data_2023.pickle', 'rb') as archivo:
    df = pickle.load(archivo)

In [243]:
df['CompTotal'].value_counts().head(10)

CompTotal
60000.0    60
50000.0    56
40000.0    50
45000.0    46
35000.0    43
30000.0    43
70000.0    38
80000.0    30
55000.0    28
24000.0    23
Name: count, dtype: int64

Elimino columnas que no se utilizarán en el análisis
- ResponseId (index)
- Check (pregunta en la encuesta para verificar que el encuestado presta atencion a lo que responde)
- ConvertedCompYearly (conversion a dolares americanos de los que gana, lo elimino porque tratare solo EUR)

In [124]:
df = df.drop(columns=['Q120', 'ConvertedCompYearly', 'Country', 'Currency'])

### 2. Codificación de MainBranch
Empiezo a trabajar columna por columna con los ecoders. 
0. MainBranch: Que tipo de programador eres:
- I am a developer by profession
- I am not primarily a developer, but I write code sometimes as part of my work/studies 
- I used to be a developer by profession, but no longer am 
- I am learning to code 
- I code primarily as a hobby 
- None of these

Decido hacer un Label encoder, con valores del 0 (None of these) al 5 (Developer by profession), ya que quiero que se le de eventualmente mas peso a estos perfiles

In [125]:
df['MainBranch'].unique()

array(['I am a developer by profession',
       'I am not primarily a developer, but I write code sometimes as part of my work/studies'],
      dtype=object)

In [126]:
labels = {#'None of these': 0, 
        #   'I code primarily as a hobby': 1, 
        #   'I am learning to code':2,              --------------> todos estos son valores que aparecen en la encuesta, pero no en las respuestas
        #   'I used to be a developer by profession, but no longer am':3, 
          'I am not primarily a developer, but I write code sometimes as part of my work/studies': 4,
          'I am a developer by profession':5}
df['MainBranch'] = df['MainBranch'].map(labels).fillna(-1)

In [127]:
df['MainBranch'].isna().sum()

0

1. Age:
- Under 18 years old 
- 18-24 years old 
- 25-34 years old 
- 35-44 years old 
- 45-54 years old 
- 55-64 years old 
- 65 years or older 
- Prefer not to say

Decido primero hacer un mapping y reducir las categorias, y luego hacer un OneHotEncoder: aunque se agregarian mas columnas, no son muchas.

Antes de hacer el OneHotEncoder verifico que no hayan nans. Si los hay, los trato primero, y luego hago OneHot

In [128]:
print(df['Age'].unique())

['35-44 years old' '18-24 years old' '25-34 years old' '45-54 years old'
 '55-64 years old' '65 years or older' 'Under 18 years old']


In [129]:
df['Age'].value_counts()

Age
25-34 years old       466
35-44 years old       420
45-54 years old       142
18-24 years old        90
55-64 years old        27
65 years or older       4
Under 18 years old      1
Name: count, dtype: int64

In [130]:
print(df['Age'].isna().sum())
print(df['Age'].dtype)
df['Age'] = df['Age'].fillna('Prefer not to say')  # Rellenar NaN con una categoría válida
df['Age'] = df['Age'].str.strip()   # por las dudas, elimino espacios vacios que me puedan causar problemas con el One Hot

0
object


In [131]:
age_mapping = {
    'Under 18 years old': 'Joven',
    '18-24 years old': 'Joven',
    '25-34 years old': 'Adulto',
    '35-44 years old': 'Adulto',
    '45-54 years old': 'Adulto',
    '55-64 years old': 'Senior',
    '65 years or older': 'Senior',
    'Prefer not to say': 'Desconocido'
}
df['Age_Grouped'] = df['Age'].map(age_mapping)

In [132]:
# Aplicar one-hot encoding a 'Age_Grouped'
most_frequent_age = df['Age'].mode()[0]
df['Age'].fillna(most_frequent_age, inplace=True)
encoder = OneHotEncoder(sparse_output=False)
encoded_age = encoder.fit_transform(df[['Age_Grouped']])
encoded_age_df = pd.DataFrame(encoded_age, columns=encoder.get_feature_names_out(['Age_Grouped']), index=df.index)
df = pd.concat([df, encoded_age_df], axis=1)

# Eliminar la columna original 'Age' y la columna 'Age_Grouped'
df.drop(['Age', 'Age_Grouped'], axis=1, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(most_frequent_age, inplace=True)


Compruebo que todo esta correcto, se han agregado las columnas correspondientes, y las filas siguen igual. Sigo. 

2. Employment:
- Employed, full-time 
- Employed, part-time 
- Independent contractor, freelancer, or self-employed
- Not employed, but looking for work 
- Not employed, and not looking for work 
- Student, full-time 
- Student, part-time 
- Retired 
- I prefer not to say



Esta variable describe la situación laboral de los encuestados. Se identificó como una variable potencialmente importante para la predicción del salario. Sin embargo, la codificación original de esta variable, que combinaba múltiples categorías en una sola cadena de texto, presentaba ciertos desafíos para su uso en modelos de machine learning.

*Estrategia Inicial y sus Limitaciones*

Inicialmente, se implementó una estrategia de mapeo que asignaba valores numéricos a cada categoría presente en la columna employment. Se asignó un valor de 1 a "Employed, full-time", 0.5 a "Employed, part-time" y "Student, part-time", 1 a "Independent contractor, freelancer, or self-employed", -1 a "I prefer not to say", y 0 a "Retired", "Not employed, and not looking for work", "Not employed, but looking for work" y "Student, full-time".  Además, en los casos donde se combinaban múltiples categorías, se sumaban los valores correspondientes.

Sin embargo, este enfoque presentaba algunas limitaciones:
- Complejidad: La variable resultante tenía una relación compleja con el salario, lo que dificultaba el aprendizaje del modelo.
- Pérdida de Información: No se capturaba de forma clara la distinción entre los diferentes tipos de empleo.
- Baja Importancia: Los modelos entrenados con esta codificación asignaban una baja importancia a la variable employment_map, lo que sugería que la información no se estaba aprovechando de forma óptima.

*Estrategia Mejorada: Variables Binarias y num_jobs*

Para abordar las limitaciones de la estrategia inicial, se implementó un enfoque alternativo basado en la creación de variables binarias y una variable numérica adicional. Se crearon las siguientes variables:

- is_full_time: Indica si la persona está empleada a tiempo completo (1) o no (0).
- is_part_time: Indica si la persona está empleada a tiempo parcial (1) o no (0).
- is_independent: Indica si la persona es freelancer o trabaja por cuenta propia (1) o no (0).
- is_retired: Indica si la persona esta retirada (1) o no (0).
- is_other_employment: Indica si la persona no se puede clasificar en ninguna de las otras categorias (1) o no (0).
- num_jobs: Indica el número de empleos o actividades que realiza la persona, inferido a partir del número de categorías presentes en la columna employment original.

In [133]:
df['Employment'].unique()

array(['Employed, full-time',
       'Employed, full-time;Independent contractor, freelancer, or self-employed',
       'Independent contractor, freelancer, or self-employed',
       'Employed, full-time;Independent contractor, freelancer, or self-employed;Employed, part-time',
       'Employed, part-time',
       'Independent contractor, freelancer, or self-employed;Employed, part-time',
       'I prefer not to say', 'Employed, full-time;Employed, part-time',
       'Retired'], dtype=object)

In [134]:
df['Employment'].value_counts()

Employment
Employed, full-time                                                                             974
Independent contractor, freelancer, or self-employed                                             99
Employed, full-time;Independent contractor, freelancer, or self-employed                         52
Independent contractor, freelancer, or self-employed;Employed, part-time                         11
Employed, part-time                                                                              10
Employed, full-time;Independent contractor, freelancer, or self-employed;Employed, part-time      1
I prefer not to say                                                                               1
Employed, full-time;Employed, part-time                                                           1
Retired                                                                                           1
Name: count, dtype: int64

In [135]:
# Reemplazo 'Retired' para no crear una nueva columna con eso, al ser 1 solo valor, no tendra importancia. 
df['Employment'] = df['Employment'].replace('Retired', 'I prefer not to say')

In [136]:
# Crear las columnas binarias
df['is_full_time'] = df['Employment'].str.contains('Employed, full-time').fillna(False).astype(int)
df['is_part_time'] = df['Employment'].str.contains('Employed, part-time').fillna(False).astype(int)
df['is_independent'] = df['Employment'].str.contains('Independent contractor, freelancer, or self-employed').fillna(False).astype(int)

# Crear la columna 'num_jobs'
df['num_jobs'] = df['Employment'].str.split(';').str.len().fillna(0).astype(int)

# Tratar los valores especiales (I prefer not to say, etc.)
df['is_other_employment'] = ((df['is_full_time'] == 0) & (df['is_part_time'] == 0) &
                            (df['is_independent'] == 0)).astype(int)
# Eliminar la columna original 'Employment'
df.drop('Employment', axis=1, inplace=True)

3. RemoteWork:
- Remote 
- In-person 
- Hybrid (some remote, some in-person)

Se ha decidido agrupar las categorías 'Remote' y 'Hybrid' de la variable RemoteWork en una nueva categoría denominada 'Remote/Hybrid'. Esta decisión se ha tomado en base a las siguientes consideraciones:

Similitud Conceptual: Tanto 'Remote' como 'Hybrid' implican la posibilidad de trabajar fuera de la oficina, lo que las diferencia sustancialmente de la categoría 'In-person'.
Equilibrio de Grupos: La agrupación da lugar a dos grupos de tamaño más equilibrado ('Remote/Hybrid': 1040 muestras, 'In-person': 108 muestras), lo que puede contribuir a la estabilidad y el rendimiento del modelo.
Simplificación del Modelo: La reducción del número de categorías simplifica el modelo y facilita la interpretación de los resultados.
El análisis de la importancia de las variables tras la agrupación, junto con la evaluación del rendimiento del modelo mediante validación cruzada, permitirá determinar si esta estrategia de agrupamiento es beneficiosa para la predicción de los salarios.

In [137]:
df['RemoteWork']=df['RemoteWork'].fillna('Remote')
df['RemoteWork'].isna().sum()

0

In [138]:
df['RemoteWork'].value_counts()

RemoteWork
Remote                                  655
Hybrid (some remote, some in-person)    387
In-person                               108
Name: count, dtype: int64

In [139]:
df['RemoteWork'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 1150 entries, 0 to 1149
Series name: RemoteWork
Non-Null Count  Dtype 
--------------  ----- 
1150 non-null   object
dtypes: object(1)
memory usage: 9.1+ KB


In [140]:
df['Remote_grouped'] = df['RemoteWork'].map({'Remote': 'Remote/Hybrid', 'Hybrid (some remote, some in-person)': 'Remote/Hybrid', 'In-person': 'In-person'})

# Aplicar one-hot encoding a 'Remote_grouped'
encoder = OneHotEncoder(sparse_output=False)
encoded_remote = encoder.fit_transform(df[['Remote_grouped']])
encoded_remote_df = pd.DataFrame(encoded_remote, columns=encoder.get_feature_names_out(['Remote_grouped']), index=df.index)
df = pd.concat([df, encoded_remote_df], axis=1)

# Eliminar las columnas originales
df.drop(['RemoteWork', 'Remote_grouped'], axis=1, inplace=True)

4. CodingActivities (fuera del trabajo):
- Hobby (2)
- Freelance/contract work (3)
- Contribute to open-source projects (4) 
- Bootstrapping a business (5)
- School or academic work (1)
- Professional development or self-paced learning from online courses (6)
- I don’t code outside of work (0)
- Other (-1)

Decido un Label encoder y darle mas peso a unas categorias, mas que a otras (indicado el peso de cada una)

Hay Muchos Nan, los trabajo imputandolos a la categoria de "Other"

In [141]:
df['CodingActivities'].isna().sum()

2

In [142]:
def process_multiple_categories(df, category_column, target_column, separator=','):
    """
    Realiza target encoding para columnas con múltiples valores separados por un delimitador.
    """
    # Llenar valores NaN en la columna categórica con 'Unknown'
    df[category_column] = df[category_column].fillna('Unknown')
    
    # Expandir los valores separados por comas en listas
    df[category_column] = df[category_column].apply(lambda x: x.split(separator) if isinstance(x, str) else [x])
    
    # Crear un DataFrame temporal para aplanar las listas
    exploded_df = df.explode(category_column)
    
    # Calcular el promedio del target por categoría
    target_map = exploded_df.groupby(category_column)[target_column].mean().to_dict()
    
    # Función para calcular el promedio de los valores codificados para una fila
    def calculate_row_encoding(categories):
        encoded_values = [target_map.get(cat, 0) for cat in categories]  # 0 para categorías desconocidas
        return sum(encoded_values) / len(encoded_values) if encoded_values else 0
    
    # Crear la nueva columna con el promedio del encoding
    df[f'{category_column}_encoded'] = df[category_column].apply(calculate_row_encoding)
    
    return df, target_map


# Aplicar el procesamiento a la columna
df, target_map = process_multiple_categories(df, 'CodingActivities', 'CompTotal')

# Eliminamos las columnas temporales si ya no son necesarias
df = df.drop(['CodingActivities'], axis=1, errors='ignore')

# Revisamos los valores codificados
print("\nValores únicos después del encoding:")
print(df[f'CodingActivities_encoded'].value_counts(dropna=False))


Valores únicos después del encoding:
CodingActivities_encoded
50361.000000     270
58952.191011     178
50908.278146     151
67843.790698      86
45857.142857      56
67207.588235      51
52341.025641      39
85717.555556      27
60130.434783      23
47550.000000      20
33220.000000      20
51105.882353      17
62211.764706      17
61656.250000      16
35130.133333      15
68000.000000      13
65458.333333      12
35116.666667      12
73900.000000      10
59111.111111       9
51000.000000       7
65357.142857       7
66166.666667       6
48333.333333       6
43666.666667       6
123500.000000      6
78666.666667       6
81400.000000       5
57600.000000       5
70800.000000       5
46875.000000       4
90000.000000       3
36333.333333       3
37000.000000       3
52666.666667       3
155666.666667      3
57500.000000       2
60000.000000       2
79000.000000       2
42500.000000       2
45250.000000       2
40500.000000       2
45000.000000       2
47500.000000       2
42680.000000 

5. EdLevel: 
- Primary/elementary school (0)
- Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.) (1)
- Some college/university study without earning a degree (2)
- Associate degree (A.A., A.S., etc.) (3)
- Bachelor’s degree (B.A., B.S., B.Eng., etc.) (4) 
- Master’s degree (M.A., M.S., M.Eng., MBA, etc.) (5) 
- Professional degree (JD, MD, Ph.D, Ed.D, etc.) (6)
- Something else (-1)

Hago un LabelEncoder con los valores destacados

In [143]:
df['EdLevel'].unique()

array(['Master’s degree (M.A., M.S., M.Eng., MBA, etc.)',
       'Associate degree (A.A., A.S., etc.)',
       'Bachelor’s degree (B.A., B.S., B.Eng., etc.)',
       'Professional degree (JD, MD, Ph.D, Ed.D, etc.)',
       'Some college/university study without earning a degree',
       'Something else',
       'Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)',
       'Primary/elementary school'], dtype=object)

In [144]:
df['EdLevel'].value_counts()

EdLevel
Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                          386
Master’s degree (M.A., M.S., M.Eng., MBA, etc.)                                       337
Some college/university study without earning a degree                                164
Professional degree (JD, MD, Ph.D, Ed.D, etc.)                                        124
Associate degree (A.A., A.S., etc.)                                                    65
Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)     39
Something else                                                                         26
Primary/elementary school                                                               9
Name: count, dtype: int64

In [145]:
df['EdLevel'].isna().sum()

0

In [146]:
labels5 = {
    'Master’s degree (M.A., M.S., M.Eng., MBA, etc.)': 5,
    'Bachelor’s degree (B.A., B.S., B.Eng., etc.)': 4,
    'Professional degree (JD, MD, Ph.D, Ed.D, etc.)': 6,
    'Some college/university study without earning a degree': 2, 
    'Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)': 1,
    'Associate degree (A.A., A.S., etc.)': 3, 
    'Something else': -1,
    'Primary/elementary school': 0
}
df['EdLevel'] = df['EdLevel'].map(labels5).fillna(-1)

6. LearnCode: 
- Books / Physical media (3): Implica un esfuerzo autodirigido, pero es formal y metódico.
- Coding Bootcamp (3): También estructurado, pero más intensivo y con un enfoque en habilidades prácticas.
- Colleague (2): Más relacionado con la práctica laboral, pero también limitado al entorno de trabajo.
- Friend or family member (1): Aprendizaje menos formal y probablemente limitado en alcance.
- Hackathons (virtual or in-person)
- Online Courses or Certification (2): Similar al entrenamiento en el trabajo, pero autodirigido y menos formal.
- On the job training (2): Aprender en el trabajo es práctico, pero menos estructurado.
- Other online resources (e.g., videos, blogs, forum, online community) (2): Aunque útil, suele ser más informal y variado en calidad.
- School (i.e., University, College, etc) (4): Representa una forma tradicional de aprendizaje, estructurada y generalmente reconocida como valiosa.
- Other (please specify):(0)

Le diversa importancia a las categorias con total discrecion (segun el tipo de educacion). Un OneHot me crearia demasiadas columnas. 

Prefiero hacer una funcion que, luego de dividir en el ";", se reemplace cada categoria por el numero correspondiente, y luego sumo esos numeros. De esta forma, una persona que se ha educado de 5 fuentes diversas, tendra mas puntaje que una que solo se ha educado con 1. Da cuenta del compromiso y profesionalismo que tiene esa persona. 

In [147]:
df['LearnCode'].isna().sum()   # hay un solo valor, lo dejamos a 0 (primera linea de la funcion)

6

Creo en un dict los valores

In [148]:
# Aplicar el procesamiento a la columna 'LearnCodeOnline_Category'
df, target_map = process_multiple_categories(df, 'LearnCode', 'CompTotal')

# Eliminamos las columnas temporales si ya no son necesarias
df = df.drop(['LearnCode'], axis=1, errors='ignore')

# Revisamos los valores codificados
print("\nValores únicos después del encoding:")
print(df[f'LearnCode_encoded'].value_counts(dropna=False))


Valores únicos después del encoding:
LearnCode_encoded
55999.227857    72
57575.128454    55
52372.931970    51
54870.263257    46
54453.590602    45
                ..
55555.208510     1
51113.066259     1
59542.820294     1
69274.989241     1
61365.869708     1
Name: count, Length: 195, dtype: int64


7. LearnCodeOnline
    - Stack Overflow ('Community_Collaboration')
    - Blogs ('Community_Collaboration')
    - Online books ('Visual_Interactive')
    - Video-based online courses ('Visual_Interactive')
    - Online challenges (e.g., daily or weekly coding challenges) ('Community_Collaboration')
    - Coding sessions (live or recorded) ('Community_Collaboration')
    - Written-based online courses ('Technical_Reference')
    - How-to videos ('Visual_Interactive')
    - Auditory material (e.g., podcasts) ('Visual_Interactive')
    - Technical documentation ('Technical_Reference')
    - Certification videos ('Visual_Interactive')
    - Games that teach programming ('Visual_Interactive')
    - Written tutorials ('Technical_Reference')
    - Interactive tutorial ('Visual_Interactive')
    - Online forum ('Community_Collaboration')
    - Other ('Visual_Interactive')

Estas herramientas no tienen mas importancia unas que otras, ni son ordinales. Elijo usar OneHotEncoder. Como son muchas columnas, decido agruparlas, para que no sean tantas.
Dejo 3 categorias: 'Visual_Interactive', 'Technical_Reference', 'Community_Collaboration'. 

Creo una funcion para ver los valores unicos, ya que en las celdas hay muchos valores. 

Mapeo las categorias para que no sean tantas al hacer OneHot

In [149]:
# Aplicar el procesamiento a la columna 'LearnCodeOnline_Category'
df, target_map = process_multiple_categories(df, 'LearnCodeOnline', 'CompTotal')

# Eliminamos las columnas temporales si ya no son necesarias
df = df.drop(['LearnCodeOnline'], axis=1, errors='ignore')

# Revisamos los valores codificados
print("\nValores únicos después del encoding:")
print(df[f'LearnCodeOnline_encoded'].value_counts(dropna=False))


Valores únicos después del encoding:
LearnCodeOnline_encoded
53507.215094     265
40000.000000      20
93203.125000      16
60000.000000      14
52664.285714      14
                ... 
135000.000000      1
42645.833333       1
30500.000000       1
54125.000000       1
38800.000000       1
Name: count, Length: 353, dtype: int64


In [150]:
df['LearnCodeOnline_encoded'].value_counts()

LearnCodeOnline_encoded
53507.215094     265
40000.000000      20
93203.125000      16
60000.000000      14
52664.285714      14
                ... 
135000.000000      1
42645.833333       1
30500.000000       1
54125.000000       1
38800.000000       1
Name: count, Length: 353, dtype: int64

8. 'LearnCodeCoursesCert': la elimino, no tiene mucha información para aportar.

In [151]:
df = df.drop(['LearnCodeCoursesCert'], axis=1)

9. YearsCode: sacar los nan y pasarlo a int

In [152]:
df['YearsCode'].isnull().sum()
moda = df['YearsCode'].mode()[0]
df['YearsCode'] = df['YearsCode'].fillna(moda)

In [153]:
df['YearsCode'] = df['YearsCode'].replace('Less than 1 year', 0)

In [154]:
df['YearsCode'] = df['YearsCode'].replace('More than 50 years', 50)

In [155]:
df['YearsCode']=df['YearsCode'].astype(int)

In [156]:
df['YearsCode'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 1150 entries, 0 to 1149
Series name: YearsCode
Non-Null Count  Dtype
--------------  -----
1150 non-null   int32
dtypes: int32(1)
memory usage: 4.6 KB


10. YearsCodePro: reemplazo nan y paso a int

In [157]:
df['YearsCodePro'].isnull().sum()

8

In [158]:
moda = df['YearsCodePro'].mode()[0]
df['YearsCodePro'] = df['YearsCodePro'].fillna(moda)

In [159]:
df['YearsCodePro'].unique()

array(['10', '15', '3', '2', '13', '8', '11', '23', '30', '12', '6', '5',
       '7', '18', '20', '1', '9', '22', '28', '4', 'Less than 1 year',
       '25', '19', '17', '21', '14', '26', '27', '16', '24', '29', '35',
       '38', '34', '45', '37', '40', '31', '36', '41', '39', '33', '32',
       '49'], dtype=object)

In [160]:
df['YearsCodePro']=df['YearsCodePro'].replace('Less than 1 year', 0)

In [161]:
df['YearsCodePro']=df['YearsCodePro'].astype(int)
df['YearsCodePro'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 1150 entries, 0 to 1149
Series name: YearsCodePro
Non-Null Count  Dtype
--------------  -----
1150 non-null   int32
dtypes: int32(1)
memory usage: 4.6 KB


11. DevType
- Academic researcher (5)
- Blockchain (6)
- Cloud infrastructure engineer (3)
- Data or business analyst (2)
- Data engineer (2)
- Data scientist or machine learning specialist (2)
- Database administrator (7)
- Designer (7)
- Developer Advocate (1)
- Developer, AI (1)
- Developer, back-end (1)
- Developer, desktop or enterprise applications (1)
- Developer, embedded applications or devices (1)
- Developer Experience (1)
- Developer, front-end (1)
- Developer, full-stack(1)
- Developer, game or graphics (1)
- Developer, mobile(1) 
- Developer, QA or test (1)
- DevOps specialist (3)
- Educator (5)
- Engineer, site reliability (3)
- Engineering manager (4)
- Hardware Engineer (6)
- Marketing or sales professional (7)
- Product manager (4)
- Project manager (4)
- Research & Development role (5)
- Scientist (5)
- Senior Executive (C-Suite, VP, etc.) (4)
- Student (7)
- System administrator (3)
- Security professional (3)
- Other (7)

Elijo agruparlos para que no sean tantos, y asi simplificar el analisis, y luego un OneHotEncoder:
1. Gestión y Liderazgo
2. Desarrollo de Software
3. Infraestructura y Sistemas
4. Ingeniería de Datos y Ciencia de Datos
5. Investigación y Educación
6. Hardware y Blockchain
7. Roles de Soporte y Otros

In [162]:
# Aplicar el procesamiento a la columna 'DevType_encoded'
df, target_map = process_multiple_categories(df, 'DevType', 'CompTotal')

# Eliminamos las columnas temporales si ya no son necesarias
df = df.drop(['DevType'], axis=1, errors='ignore')

# Revisamos los valores codificados
print("\nValores únicos después del encoding:")
print(df[f'DevType_encoded'].value_counts(dropna=False))


Valores únicos después del encoding:
DevType_encoded
53536.351028     313
55708.823657     283
52893.454509     113
56784.315639      52
48495.371285      47
50761.363636      33
69206.125227      29
83401.571429      28
32359.692308      26
85898.000000      25
59279.131692      23
56313.333333      21
73800.000000      20
58578.947368      19
35516.875000      16
51076.923077      13
40700.000000      10
109444.444444      9
82102.485380       9
55702.114784       9
81125.000000       8
46240.249705       7
109333.333333      6
42400.000000       5
43720.000000       5
45150.000000       4
49500.000000       4
49750.000000       4
30666.666667       3
29000.000000       3
96000.000000       2
30000.000000       1
Name: count, dtype: int64


12. OrgSize: Cuántas personas forman parte de la organización para la que trabajas (organización primaria)
- I don’t know
- Just me - I am a freelancer, sole proprietor, etc. 
- 2 to 9 employees 
- 10 to 19 employees 
- 20 to 99 employees 
- 100 to 499 employees 
- 500 to 999 employees 
- 1000 to 4999 employees 
- 5000 to 9999 employees 
- 10000 or more employees 

Label Encoder, en ese orden

In [163]:
df['OrgSize'].unique()

array(['2 to 9 employees', '10,000 or more employees',
       '500 to 999 employees', '1,000 to 4,999 employees',
       '20 to 99 employees', '100 to 499 employees', '10 to 19 employees',
       'Just me - I am a freelancer, sole proprietor, etc.',
       'I don’t know', '5,000 to 9,999 employees', nan], dtype=object)

In [164]:
df['OrgSize'].value_counts()

OrgSize
20 to 99 employees                                    253
100 to 499 employees                                  217
1,000 to 4,999 employees                              190
2 to 9 employees                                      111
10,000 or more employees                              105
500 to 999 employees                                   89
10 to 19 employees                                     78
Just me - I am a freelancer, sole proprietor, etc.     55
5,000 to 9,999 employees                               34
I don’t know                                           17
Name: count, dtype: int64

Los valores NaN siguen apareciendo, a pesar de hacer utilizado el fillna mas arriba. Por eso lo pongo tambien al hacer el map. 

In [165]:
# Agrupación de categorías
orgsize_mapping = {
    'I don’t know': 'Desconocido',
    'Just me - I am a freelancer, sole proprietor, etc.': 'Muy Pequeña',
    '2 to 9 employees': 'Muy Pequeña',
    '10 to 19 employees': 'Muy Pequeña',
    '20 to 99 employees': 'Pequeña',
    '100 to 499 employees': 'Pequeña',
    '500 to 999 employees': 'Mediana',
    '1,000 to 4,999 employees': 'Mediana',
    '5,000 to 9,999 employees': 'Grande',
    '10,000 or more employees': 'Grande'
}
df['OrgSize_Grouped'] = df['OrgSize'].map(orgsize_mapping).fillna('Desconocido')

# One-hot encoding de la variable agrupada
encoder = OneHotEncoder( sparse_output=False)
encoded_orgsize = encoder.fit_transform(df[['OrgSize_Grouped']])
encoded_orgsize_df = pd.DataFrame(encoded_orgsize, columns=encoder.get_feature_names_out(['OrgSize_Grouped']), index=df.index)
df = pd.concat([df, encoded_orgsize_df], axis=1)

# Eliminar las columnas originales
df.drop(['OrgSize', 'OrgSize_Grouped'], axis=1, inplace=True)

13. PurchaseInfluence: Nivel de influencia sobre nuevas tecnologías adquiridas por la organización
- I have little or no influence 
- I have some influence 
- I have a great deal of influence

Label Encoder en ese orden

In [166]:
# Primero trato los Nan
df['PurchaseInfluence'] = df['PurchaseInfluence'].fillna('Other')
print(df['PurchaseInfluence'].isna().sum())

0


In [167]:
df['PurchaseInfluence'].unique()

array(['I have little or no influence', 'I have some influence',
       'I have a great deal of influence', 'Other'], dtype=object)

In [168]:
labels13 = {
    'I have little or no influence': 0,
    'I have some influence': 1,
    'I have a great deal of influence': 2,
    'Other': -1
}
df['PurchaseInfluence'] = df['PurchaseInfluence'].map(labels13).fillna(-1)

14. TechList: Irrelevante, lo elimino

In [169]:
df = df.drop(['TechList'], axis=1)

15. BuyNewTool: Cómo descubrir o investigas soluciones disponibles en nuevas herramientas o softwares
- Start a free trial 
- Ask developers I know/work with 
- Ask a generative AI tool 
- Visit developer communities like Stack Overflow 
- Read ratings or reviews on third party sites like G2 Crowd 
- Research companies that have advertised on sites I visit 
- Research companies that have emailed me
- Other 

No tienen orden, separo por ; y hago OneHotEncoder al final

In [170]:
df['BuyNewTool'].unique()

array(['Start a free trial;Ask developers I know/work with',
       'Ask developers I know/work with;Visit developer communities like Stack Overflow',
       'Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow',
       nan,
       'Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow;Ask a generative AI tool',
       'Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow;Read ratings or reviews on third party sites like G2 Crowd',
       'Ask developers I know/work with',
       'Ask developers I know/work with;Visit developer communities like Stack Overflow;Read ratings or reviews on third party sites like G2 Crowd',
       'Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow;Read ratings or reviews on third party sites like G2 Crowd;Ask a generative AI tool',
       'Visit developer communities like S

In [171]:
df['BuyNewTool'].isna().sum()
df['BuyNewTool'] = df['BuyNewTool'].fillna('Other')

#### Ahora vienen una seguidilla de columnas que tengo que separar por el ';' y luego hacer un OneHotEncoder. 

Creo una funcion para aplicar lo mismo a todas juntas. El codigo de estas columnas se vera al final.

Luego elimino las que indico en el listado, por no aportar informacion nueva ni relevante

16. LanguageHaveWorkedWith: Dividirlo por ';' y luego OneHotEncoder. Se haran muchas columnas, pero no queda otra. 

17. LanguageWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

18. DatabaseHaveWorkedWith: Igual

19. DatabaseWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

20. PlatformHaveWorkedWith: Igual

21. PlatformWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

22. WebframeHaveWorkedWith: Igual

23. WebframeWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

24. MiscTechHaveWorkedWith: Igual 

25. MiscTechWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas 

26. ToolsTechHaveWorkedWith: Igual 

27. ToolsTechWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

28. NEWCollabToolsHaveWorkedWith: Igual 

29. NEWCollabToolsWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

30. OpSysPersonal use: La elimino, me quedo solo con el uso de sistema operativo profesional. 

31. OpSysProfessional use: Agrupo los sistemas operativos en los mas representativos para no hacer tantas columnas: 
- AIX (4) Otros
- Android (3) Android-based
- Arch (1)
- BSD (1)
- ChromeOS (3)
- Cygwin (4)
- Debian (1) Linux-based
- Fedora (1)
- Haiku (1)
- iOS (2) MacOs
- iPadOS (2)
- MacOS (2)
- Other Linux-based (1)
- Red Hat (1)
- Solaris (1)
- Ubuntu (1)
- Windows (0) Windows-based
- Windows Subsystem for Linux (WSL) (0)
- Other (4)

Una vez agrupadas, le hago un MultilabelEncoder para poder poner varias etiquetas por fila

In [172]:
df['OpSysProfessional use'].unique()

array(['Debian;Other Linux-based', 'MacOS',
       'Windows;Windows Subsystem for Linux (WSL)', nan, 'Fedora',
       'Ubuntu', 'Windows', 'Fedora;Windows', 'MacOS;Windows',
       'MacOS;Ubuntu', 'Ubuntu;Windows', 'Arch;Debian;Ubuntu',
       'iOS;iPadOS;MacOS', 'Android;iOS;MacOS', 'Android;MacOS', 'iOS',
       'Debian;Other Linux-based;Ubuntu', 'iOS;MacOS',
       'Android;iOS;iPadOS;MacOS;Ubuntu',
       'Arch;Debian;Ubuntu;Windows;Windows Subsystem for Linux (WSL)',
       'Arch;Windows;Windows Subsystem for Linux (WSL)', 'iPadOS;MacOS',
       'Ubuntu;Windows;Windows Subsystem for Linux (WSL)',
       'Android;Debian;Other Linux-based;Ubuntu;Windows', 'Arch',
       'Debian;Other Linux-based;Ubuntu;Windows;Windows Subsystem for Linux (WSL)',
       'Other (Please Specify):', 'Android;Windows',
       'Debian;Ubuntu;Windows;Windows Subsystem for Linux (WSL)',
       'Other Linux-based;Red Hat',
       'Cygwin;Windows;Windows Subsystem for Linux (WSL)',
       'MacOS;Ubuntu;Window

In [173]:
print(df['OpSysProfessional use'].value_counts())

OpSysProfessional use
MacOS                                                                                    234
Windows                                                                                  192
Ubuntu                                                                                   101
Windows;Windows Subsystem for Linux (WSL)                                                 52
Ubuntu;Windows                                                                            39
                                                                                        ... 
Android;Debian;MacOS;Red Hat;Ubuntu;Windows;Windows Subsystem for Linux (WSL)              1
Android;Debian;Ubuntu;Windows                                                              1
Arch;Cygwin;Debian;Other Linux-based;Ubuntu;Windows;Windows Subsystem for Linux (WSL)      1
Other Linux-based;Ubuntu;Windows;Windows Subsystem for Linux (WSL)                         1
Android;MacOS;Other Linux-based;Red Hat;Windows 

In [174]:
# Primero trato los Nan
df['OpSysProfessional use'] = df['OpSysProfessional use'].fillna('Other')
df['OpSysProfessional use'].isnull().sum()

0

In [175]:
# Diccionario de mapeo
os_map = {
    'AIX': 'Otros',
    'Android': 'Android-based',
    'Arch': 'Linux-based',
    'BSD': 'Linux-based',
    'ChromeOS': 'Android-based',
    'Cygwin': 'Otros',
    'Debian': 'Linux-based',
    'Fedora': 'Linux-based',
    'Haiku': 'Linux-based',
    'iOS': 'MacOS',
    'iPadOS': 'MacOS',
    'MacOS': 'MacOS',
    'Other Linux-based': 'Linux-based',
    'Red Hat': 'Linux-based',
    'Solaris': 'Linux-based',
    'Ubuntu': 'Linux-based',
    'Windows': 'Windows-based',
    'Windows Subsystem for Linux (WSL)': 'Linux-based',
    'Other': 'Otros'
}

In [176]:
# Función modificada para manejar NaN
def split_and_map(value):
    if pd.isna(value):
        return []  # Retorna una lista vacía para NaN
    return [os_map.get(os.strip(), 'Otros') for os in value.split(';')]

# Aplicar la función a la columna
df['OpSysProfessional_Category'] = df['OpSysProfessional use'].apply(split_and_map)

# Usar MultiLabelBinarizer para el one-hot encoding
mlb = MultiLabelBinarizer()
encoded = mlb.fit_transform(df['OpSysProfessional_Category'])

# Crear DataFrame con las columnas codificadas
encoded_df = pd.DataFrame(encoded, columns=mlb.classes_, index=df.index)

# Unir el DataFrame original con las nuevas columnas codificadas
df = pd.concat([df, encoded_df], axis=1)

# Eliminar las columnas originales si ya no las necesitas
df = df.drop(['OpSysProfessional_Category', 'OpSysProfessional use'], axis=1)

# Verificar que no se haya creado una columna 'nan'
if 'nan' in encoded_df.columns:
    df = df.drop('nan', axis=1)

32. OfficeStackAsyncHaveWorkedWith (Herramientas de documentación que has usado): Igual 

33. OfficeStackAsyncWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

34. OfficeStackSyncHaveWorkedWith(Herramientas de comunicación que has usado): Igual

35. OfficeStackSyncWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

36. AISearchDevHaveWorkedWith: Igual

37. AISearchWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitiva

38. AIDevHaveWorkWith: Igual

39. AIDevWantToWorkWith: La elimino, dejo solo la de "HaveWorkedWith" como representativa. Sino se me hacen muchas columnas y muy repetitivas

40. NEWSOSites(Uso de StackOverFlow): No tiene relevancia para mi analisis en concreto, lo elimino. 

41. SOVisitFreq: No tiene relevancia para mi analisis en concreto, lo elimino.

42. SOAccount: No tiene relevancia para mi analisis en concreto, lo elimino.

43. SOPartFreq: No tiene relevancia para mi analisis en concreto, lo elimino.

44. SOComm: No tiene relevancia para mi analisis en concreto, lo elimino.

45. SOAI: No tiene relevancia para mi analisis en concreto, lo elimino.

46. AISelect (Usas IA para tu proceso de desarrollo):
- Yes 
- No, but I plan to soon 
- No, and I don't plan to
Elijo Label encoder con (2) para el Yes.

In [177]:
df['AISelect'].unique()

array(['Yes', 'No, but I plan to soon', "No, and I don't plan to"],
      dtype=object)

In [178]:
# Primero trato los Nan
df['AISelect'] = df['AISelect'].fillna('Other')
print(df['AISelect'].isna().sum())

0


In [179]:
labels60 = {
    "No, and I don't plan to": 0,
    'No, but I plan to soon': 1,
    'Yes': 2,
    'Other': -1
}
df['AISelect'] = df['AISelect'].map(labels60).fillna(-1)

47. AISent (Uso de herramientas de IA como parte de su flujo de trabajo de desarrollo): 
- Very favorable (5) 
- Favorable(4)
- Indifferent (3)
- Unfavorable (2)
- Very unfavorable (1)
- Unsure (0)

LabelEncoder con los valores indicados. 

In [180]:
df['AISent'].unique()

array(['Favorable', 'Indifferent', 'Unsure', nan, 'Very favorable',
       'Unfavorable', 'Very unfavorable'], dtype=object)

In [181]:
# Primero trato los Nan
df['AISent'] = df['AISent'].fillna('Unsure')
print(df['AISent'].isna().sum())

0


In [182]:
labels61 = {
    'Very favorable': 5, 
    'Favorable': 4, 
    'Indifferent': 3, 
    'Unfavorable': 2,
    'Very unfavorable': 1,
    'Unsure': 0
}
df['AISent'] = df['AISent'].map(labels61).fillna(-1)

48. AIAcc (Cuanto confias en el output de la IA como parte de tu trabajo):
- Increase productivity
- Greater efficiency
- Improve collaboration
- Speed up learning
- Improve accuracy in coding
- None of the above

Dividirlo por ';' y luego OneHotEncoder. 

49. AIBen (beneficios importantes espera lograr con la IA): 
- Increase productivity 
- Greater efficiency 
- Make workload more manageable 
- Improve collaboration 
- Speed up learning 
- Improve accuracy in coding 
- None of the above 
- Other 

Dividirlo por ';' y luego OneHotEncoder. 

50. AIToolInterested in Using: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

51. AIToolCurrently Using:
- Learning about a codebase 
- Project planning 
- Writing code 
- Documenting code 
- Debugging and getting help 
- Testing code 
- Committing and reviewing code 
- Deployment and monitoring 
- Predictive analytics 
- Search for answers 
- Generating content or synthetic data 
- Other 

Dividirlo por ';' y luego OneHotEncoder.

52. AIToolNot interested in Using: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

53. AINextVery different: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

54. AINextNeither different nor similar: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

55. AINextSomewhat similar: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

56. AINextVery similar: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

57. AINextSomewhat different: La elimino, dejo solo la de "Currently in using" como representativa. Sino se me hacen muchas columnas y muy repetitivas

58. TBranch: Irrelevante, la elimino

59. ICorPM: Irrelevante, la elimino

60. WorkExp: Los dejo como estan 

61. Knowledge_1: Irrelevante, lo elimino

62. Knowledge_2: Irrelevante, lo elimino

63. Knowledge_3: Irrelevante, lo elimino

64. Knowledge_4: Irrelevante, lo elimino

65. Knowledge_5: Irrelevante, lo elimino

66. Knowledge_6: Irrelevante, lo elimino

67. Knowledge_7: Irrelevante, lo elimino

68. Knowledge_8: Irrelevante, lo elimino

69. Frequency_1: (Qué tan frecuente experimentas ‘Needing help from people outside of your immediate team?‘): LabelEncoder 

In [183]:
# Primero trato los Nan
print(df['Frequency_1'].isna().sum())

402


In [184]:
df['Frequency_1'].unique()

array(['1-2 times a week', 'Never', nan, '10+ times a week',
       '3-5 times a week', '6-10 times a week'], dtype=object)

In [185]:
labels88 = {
    '10+ times a week': 4, 
    '6-10 times a week': 3, 
    '3-5 times a week': 2,
    '1-2 times a week': 1, 
    'Never': 0, 
    'Other': -1
}
median_freq1 = df['Frequency_1'].map(labels88).median()
df['Frequency_1'] = df['Frequency_1'].map(labels88).fillna(median_freq1)

70. Frequency_2 (Qué tan frecuente experimentas ‘Interacting with people outside of your immediate team?’):  LabelEncoder

Mismas categorias que la 88, uso ese labels

In [186]:
df['Frequency_2'].unique()

array(['1-2 times a week', nan, 'Never', '6-10 times a week',
       '3-5 times a week', '10+ times a week'], dtype=object)

In [187]:
# Primero trato los Nan
print(df['Frequency_2'].isna().sum())

404


In [188]:
median_freq2 = df['Frequency_2'].map(labels88).median()
df['Frequency_2'] = df['Frequency_2'].map(labels88).fillna(median_freq2)

71. Frequency_3	(Qué tan frecuente experimentas ‘Encountering knowledge silos (where one individual or team has information that's not shared or distributed with other individuals or teams) at work?’): LabelEncoder

Mismas categorias que la 88, uso ese labels

In [189]:
df['Frequency_3'].unique()

array(['1-2 times a week', 'Never', nan, '6-10 times a week',
       '3-5 times a week', '10+ times a week'], dtype=object)

In [190]:
# Primero trato los Nan
print(df['Frequency_3'].isna().sum())

414


In [191]:
median_freq3 = df['Frequency_3'].map(labels88).median()
df['Frequency_3'] = df['Frequency_3'].map(labels88).fillna(median_freq3)

72. TimeSearching: (Promedio de tiempo que gastas buscando respuesta o solución a algún problema): 
- 'Less than 15 minutes a day': 0,
- '15-30 minutes a day': 1,
- '30-60 minutes a day': 2,
- '60-120 minutes a day': 3,
- 'Over 120 minutes a day': 4

LabelEncoder con los valores asignados

In [192]:
df['TimeSearching'].unique()

array(['Less than 15 minutes a day', '15-30 minutes a day',
       '30-60 minutes a day', nan, '60-120 minutes a day',
       'Over 120 minutes a day'], dtype=object)

In [193]:
# Primero trato los Nan
df['TimeSearching'] = df['TimeSearching'].fillna('IDK')
print(df['TimeSearching'].isna().sum())

0


In [194]:
labels91 = {
'Less than 15 minutes a day': 0,
    '15-30 minutes a day': 1,
    '30-60 minutes a day': 2,
    '60-120 minutes a day': 3,
    'Over 120 minutes a day': 4,
    'IDK': -1
}
df['TimeSearching'] = df['TimeSearching'].map(labels91).fillna(-1)

73. TimeAnswering	



In [195]:
df['TimeAnswering'].unique()

array(['Less than 15 minutes a day', '30-60 minutes a day',
       '15-30 minutes a day', nan, '60-120 minutes a day',
       'Over 120 minutes a day'], dtype=object)

In [196]:
# Primero trato los Nan
df['TimeAnswering'] = df['TimeAnswering'].fillna('IDK')
print(df['TimeAnswering'].isna().sum())

0


In [197]:
df['TimeAnswering'] = df['TimeAnswering'].map(labels91).fillna(-1)

74. ProfessionalTech:  Irrelevante, lo elimino.

75. Industry (Industria de tu compañía):
- Software Development (0)
- Computer Systems Design and Services (0)
- Internet, Telecomm or Information Services (0)
- Fintech (0)
- Energy (3)
- Government (4)
- Banking/Financial Services (1)
- Manufacturing (3)
- Transportation, or Supply Chain (3)
- Healthcare (2)
- Retail and Consumer Services (4)
- Higher Education (2)
- Media & Advertising Services (4)
- Insurance (1)
- Other(4)

Los agrupo y les hago OneHotEncoder. 

In [198]:
df['Industry'].unique()

array(['Information Services, IT, Software Development, or other Technology',
       nan, 'Other', 'Healthcare', 'Retail and Consumer Services',
       'Legal Services', 'Higher Education', 'Financial Services',
       'Advertising Services',
       'Manufacturing, Transportation, or Supply Chain', 'Insurance',
       'Wholesale', 'Oil & Gas'], dtype=object)

In [199]:
# Diccionario de mapeo
industry_map = {
    'Information Services, IT, Software Development, or other Technology': 'Tecnología y Servicios Digitales',
    'Other:': 'Otros Servicios', 
    'Healthcare': 'Salud y Educación',
    'Retail and Consumer Services': 'Otros Servicios',
    'Legal Services': 'Otros Servicios',
    'Higher Education': 'Salud y Educación',
    'Financial Services': 'Servicios Financieros',
    'Advertising Services': 'Otros Servicios',
    'Manufacturing, Transportation, or Supply Chain': 'Industria y Energía',
    'Insurance': 'Servicios Financieros',
    'Wholesale': 'Otros Servicios',
    'Oil & Gas': 'Industria y Energía'
}

# Función para manejar NaN y aplicar el mapeo
def map_industry(value):
    if pd.isna(value):
        return 'Desconocido'
    return industry_map.get(value, 'Otros Servicios')

# Suponiendo que tienes un DataFrame llamado 'df' con una columna 'Industry'
# Aplicamos el mapeo a la columna 'Industry'

most_frequent_industry = df['Industry'].map(industry_map).mode()[0]
df['Industry_Category'] = df['Industry'].map(industry_map).fillna(most_frequent_industry)
# Creamos el OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

# Aplicamos el OneHotEncoder
encoded = encoder.fit_transform(df[['Industry_Category']])

# Creamos un DataFrame con las columnas codificadas
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['Industry_Category']), index=df.index)

# Unimos el DataFrame original con las nuevas columnas codificadas
df = pd.concat([df, encoded_df], axis=1)

# Eliminamos la columna temporal 'Industry_Category' si ya no la necesitas
df = df.drop(['Industry_Category', 'Industry'], axis=1)

Por la forma en que OneHotEncoder trata los Nan, se me siguen creando columnas con Industry_Category_nan. Simplemente la Elimino y esa fila quedara sin valores

Creo la funcion para aplicar OneHotEncoder a todas las columnas que quedan, a las que primero tengo que tratar haciendo split de las ;. 

In [200]:
def process_and_encode(df, columns):
    for column in columns:
        # Separamos las categorías por el delimitador ";"
        df[column] = df[column].fillna('').str.split(';')

        # Creamos un objeto MultiLabelBinarizer
        mlb = MultiLabelBinarizer()

        # Aplicamos MultiLabelBinarizer a las categorías separadas
        encoded_values = mlb.fit_transform(df[column])

        # Creamos un DataFrame con los valores codificados
        encoded_df = pd.DataFrame(encoded_values, columns=[f"{column}_{c}" for c in mlb.classes_], index=df.index)

        # Añadimos los resultados al DataFrame copiado
        df = pd.concat([df, encoded_df], axis=1)

        # Eliminamos la columna original del DataFrame copiado
        df.drop(column, axis=1, inplace=True)

    return df

Los Nan que existan en estas columnas he decidido dejarlos, y cuando se aplique el OneHotEncoder se pondran todos 0 en las columnas que agrega. En caso de que no sea asi, eliminare las columnas que sean nan

In [201]:
# Supongamos que estas son las columnas que contienen valores separados por ';'
columns_to_encode = ['BuyNewTool',
    'LanguageHaveWorkedWith', 'LanguageWantToWorkWith', 'DatabaseHaveWorkedWith', 'PlatformHaveWorkedWith', 'WebframeHaveWorkedWith', 
    'MiscTechHaveWorkedWith', 'ToolsTechHaveWorkedWith', 'NEWCollabToolsHaveWorkedWith', 'OfficeStackAsyncHaveWorkedWith', 'OfficeStackSyncHaveWorkedWith',
    'AISearchHaveWorkedWith', 'AIDevHaveWorkedWith', 'AIAcc',
    'AIBen', 'AIToolCurrently Using'
    ]  # Reemplaza con los nombres reales de las columnas


# Aplicamos la función al DataFrame
df = process_and_encode(df, columns_to_encode)

76. SurveyLength:

78. SurveyEase: Irrelevante, lo elimino.

In [202]:
columns_to_drop = [ 'DatabaseWantToWorkWith', 'PlatformWantToWorkWith', 'WebframeWantToWorkWith', 'MiscTechWantToWorkWith', 'ToolsTechWantToWorkWith', 'NEWCollabToolsWantToWorkWith', 'OpSysPersonal use',
    'OfficeStackAsyncWantToWorkWith', 'OfficeStackSyncWantToWorkWith', 'AISearchWantToWorkWith', 'AIDevWantToWorkWith',
    'NEWSOSites', 'SOVisitFreq', 'SOAccount', 'SOPartFreq', 'SOComm', 'SOAI',
    'AIToolInterested in Using', 'AIToolNot interested in Using', 'AINextVery different', 
    'AINextNeither different nor similar', 'AINextSomewhat similar', 'AINextVery similar', 'AINextSomewhat different',
    'TBranch', 'ICorPM', 'WorkExp', 
    'Knowledge_1', 'Knowledge_2', 'Knowledge_3', 'Knowledge_4', 'Knowledge_5', 'Knowledge_6', 'Knowledge_7', 'Knowledge_8', 
    'ProfessionalTech', 'SurveyLength', 'SurveyEase'
]

In [203]:
df = df.drop(columns_to_drop, axis=1)

In [204]:
for col in df.columns:
    print(col)

MainBranch
EdLevel
YearsCode
YearsCodePro
PurchaseInfluence
CompTotal
AISelect
AISent
Frequency_1
Frequency_2
Frequency_3
TimeSearching
TimeAnswering
Age_Grouped_Adulto
Age_Grouped_Joven
Age_Grouped_Senior
is_full_time
is_part_time
is_independent
num_jobs
is_other_employment
Remote_grouped_In-person
Remote_grouped_Remote/Hybrid
CodingActivities_encoded
LearnCode_encoded
LearnCodeOnline_encoded
DevType_encoded
OrgSize_Grouped_Desconocido
OrgSize_Grouped_Grande
OrgSize_Grouped_Mediana
OrgSize_Grouped_Muy Pequeña
OrgSize_Grouped_Pequeña
Android-based
Linux-based
MacOS
Otros
Windows-based
Industry_Category_Industria y Energía
Industry_Category_Otros Servicios
Industry_Category_Salud y Educación
Industry_Category_Servicios Financieros
Industry_Category_Tecnología y Servicios Digitales
BuyNewTool_Ask a generative AI tool
BuyNewTool_Ask developers I know/work with
BuyNewTool_Other
BuyNewTool_Other (please specify):
BuyNewTool_Read ratings or reviews on third party sites like G2 Crowd
BuyNewTo

In [205]:
df.head()

Unnamed: 0,MainBranch,EdLevel,YearsCode,YearsCodePro,PurchaseInfluence,CompTotal,AISelect,AISent,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,Age_Grouped_Adulto,Age_Grouped_Joven,Age_Grouped_Senior,is_full_time,is_part_time,is_independent,num_jobs,is_other_employment,Remote_grouped_In-person,Remote_grouped_Remote/Hybrid,CodingActivities_encoded,LearnCode_encoded,LearnCodeOnline_encoded,DevType_encoded,OrgSize_Grouped_Desconocido,OrgSize_Grouped_Grande,OrgSize_Grouped_Mediana,OrgSize_Grouped_Muy Pequeña,OrgSize_Grouped_Pequeña,Android-based,Linux-based,MacOS,Otros,Windows-based,Industry_Category_Industria y Energía,Industry_Category_Otros Servicios,Industry_Category_Salud y Educación,Industry_Category_Servicios Financieros,Industry_Category_Tecnología y Servicios Digitales,BuyNewTool_Ask a generative AI tool,BuyNewTool_Ask developers I know/work with,BuyNewTool_Other,BuyNewTool_Other (please specify):,BuyNewTool_Read ratings or reviews on third party sites like G2 Crowd,BuyNewTool_Research companies that have advertised on sites I visit,BuyNewTool_Research companies that have emailed me,BuyNewTool_Start a free trial,BuyNewTool_Visit developer communities like Stack Overflow,LanguageHaveWorkedWith_,LanguageHaveWorkedWith_Ada,LanguageHaveWorkedWith_Apex,LanguageHaveWorkedWith_Assembly,LanguageHaveWorkedWith_Bash/Shell (all shells),LanguageHaveWorkedWith_C,LanguageHaveWorkedWith_C#,LanguageHaveWorkedWith_C++,LanguageHaveWorkedWith_Clojure,LanguageHaveWorkedWith_Cobol,LanguageHaveWorkedWith_Crystal,LanguageHaveWorkedWith_Dart,LanguageHaveWorkedWith_Delphi,LanguageHaveWorkedWith_Elixir,LanguageHaveWorkedWith_Erlang,LanguageHaveWorkedWith_F#,LanguageHaveWorkedWith_Flow,LanguageHaveWorkedWith_Fortran,LanguageHaveWorkedWith_GDScript,LanguageHaveWorkedWith_Go,LanguageHaveWorkedWith_Groovy,LanguageHaveWorkedWith_HTML/CSS,LanguageHaveWorkedWith_Haskell,LanguageHaveWorkedWith_Java,LanguageHaveWorkedWith_JavaScript,LanguageHaveWorkedWith_Julia,LanguageHaveWorkedWith_Kotlin,LanguageHaveWorkedWith_Lisp,LanguageHaveWorkedWith_Lua,LanguageHaveWorkedWith_MATLAB,LanguageHaveWorkedWith_Nim,LanguageHaveWorkedWith_OCaml,LanguageHaveWorkedWith_Objective-C,LanguageHaveWorkedWith_PHP,LanguageHaveWorkedWith_Perl,LanguageHaveWorkedWith_PowerShell,LanguageHaveWorkedWith_Prolog,LanguageHaveWorkedWith_Python,LanguageHaveWorkedWith_R,LanguageHaveWorkedWith_Raku,LanguageHaveWorkedWith_Ruby,LanguageHaveWorkedWith_Rust,LanguageHaveWorkedWith_SAS,LanguageHaveWorkedWith_SQL,LanguageHaveWorkedWith_Scala,LanguageHaveWorkedWith_Solidity,LanguageHaveWorkedWith_Swift,LanguageHaveWorkedWith_TypeScript,LanguageHaveWorkedWith_VBA,LanguageHaveWorkedWith_Visual Basic (.Net),LanguageHaveWorkedWith_Zig,LanguageWantToWorkWith_,LanguageWantToWorkWith_APL,LanguageWantToWorkWith_Ada,LanguageWantToWorkWith_Apex,LanguageWantToWorkWith_Assembly,LanguageWantToWorkWith_Bash/Shell (all shells),LanguageWantToWorkWith_C,LanguageWantToWorkWith_C#,LanguageWantToWorkWith_C++,LanguageWantToWorkWith_Clojure,LanguageWantToWorkWith_Cobol,LanguageWantToWorkWith_Crystal,LanguageWantToWorkWith_Dart,LanguageWantToWorkWith_Delphi,LanguageWantToWorkWith_Elixir,LanguageWantToWorkWith_Erlang,LanguageWantToWorkWith_F#,LanguageWantToWorkWith_Flow,LanguageWantToWorkWith_Fortran,LanguageWantToWorkWith_GDScript,LanguageWantToWorkWith_Go,LanguageWantToWorkWith_Groovy,LanguageWantToWorkWith_HTML/CSS,LanguageWantToWorkWith_Haskell,LanguageWantToWorkWith_Java,LanguageWantToWorkWith_JavaScript,LanguageWantToWorkWith_Julia,LanguageWantToWorkWith_Kotlin,LanguageWantToWorkWith_Lisp,LanguageWantToWorkWith_Lua,LanguageWantToWorkWith_MATLAB,LanguageWantToWorkWith_Nim,LanguageWantToWorkWith_OCaml,LanguageWantToWorkWith_Objective-C,LanguageWantToWorkWith_PHP,LanguageWantToWorkWith_Perl,LanguageWantToWorkWith_PowerShell,LanguageWantToWorkWith_Prolog,LanguageWantToWorkWith_Python,LanguageWantToWorkWith_R,LanguageWantToWorkWith_Raku,LanguageWantToWorkWith_Ruby,LanguageWantToWorkWith_Rust,LanguageWantToWorkWith_SAS,LanguageWantToWorkWith_SQL,LanguageWantToWorkWith_Scala,LanguageWantToWorkWith_Solidity,LanguageWantToWorkWith_Swift,LanguageWantToWorkWith_TypeScript,LanguageWantToWorkWith_VBA,LanguageWantToWorkWith_Visual Basic (.Net),LanguageWantToWorkWith_Zig,DatabaseHaveWorkedWith_,DatabaseHaveWorkedWith_BigQuery,DatabaseHaveWorkedWith_Cassandra,DatabaseHaveWorkedWith_Clickhouse,DatabaseHaveWorkedWith_Cloud Firestore,DatabaseHaveWorkedWith_Cockroachdb,DatabaseHaveWorkedWith_Cosmos DB,DatabaseHaveWorkedWith_Couch DB,DatabaseHaveWorkedWith_Couchbase,DatabaseHaveWorkedWith_Datomic,DatabaseHaveWorkedWith_DuckDB,DatabaseHaveWorkedWith_Dynamodb,DatabaseHaveWorkedWith_Elasticsearch,DatabaseHaveWorkedWith_Firebase Realtime Database,DatabaseHaveWorkedWith_Firebird,DatabaseHaveWorkedWith_H2,DatabaseHaveWorkedWith_IBM DB2,DatabaseHaveWorkedWith_InfluxDB,DatabaseHaveWorkedWith_MariaDB,DatabaseHaveWorkedWith_Microsoft Access,DatabaseHaveWorkedWith_Microsoft SQL Server,DatabaseHaveWorkedWith_MongoDB,DatabaseHaveWorkedWith_MySQL,DatabaseHaveWorkedWith_Neo4J,DatabaseHaveWorkedWith_Oracle,DatabaseHaveWorkedWith_PostgreSQL,DatabaseHaveWorkedWith_RavenDB,DatabaseHaveWorkedWith_Redis,DatabaseHaveWorkedWith_SQLite,DatabaseHaveWorkedWith_Snowflake,DatabaseHaveWorkedWith_Solr,DatabaseHaveWorkedWith_Supabase,PlatformHaveWorkedWith_,PlatformHaveWorkedWith_Amazon Web Services (AWS),PlatformHaveWorkedWith_Cloudflare,PlatformHaveWorkedWith_Colocation,PlatformHaveWorkedWith_Digital Ocean,PlatformHaveWorkedWith_Firebase,PlatformHaveWorkedWith_Fly.io,PlatformHaveWorkedWith_Google Cloud,PlatformHaveWorkedWith_Heroku,PlatformHaveWorkedWith_Hetzner,PlatformHaveWorkedWith_IBM Cloud Or Watson,"PlatformHaveWorkedWith_Linode, now Akamai",PlatformHaveWorkedWith_Managed Hosting,PlatformHaveWorkedWith_Microsoft Azure,PlatformHaveWorkedWith_Netlify,PlatformHaveWorkedWith_OVH,PlatformHaveWorkedWith_OpenShift,PlatformHaveWorkedWith_OpenStack,PlatformHaveWorkedWith_Oracle Cloud Infrastructure (OCI),PlatformHaveWorkedWith_Render,PlatformHaveWorkedWith_Scaleway,PlatformHaveWorkedWith_VMware,PlatformHaveWorkedWith_Vercel,PlatformHaveWorkedWith_Vultr,WebframeHaveWorkedWith_,WebframeHaveWorkedWith_ASP.NET,WebframeHaveWorkedWith_ASP.NET CORE,WebframeHaveWorkedWith_Angular,WebframeHaveWorkedWith_AngularJS,WebframeHaveWorkedWith_Blazor,WebframeHaveWorkedWith_CodeIgniter,WebframeHaveWorkedWith_Deno,WebframeHaveWorkedWith_Django,WebframeHaveWorkedWith_Drupal,WebframeHaveWorkedWith_Elm,WebframeHaveWorkedWith_Express,WebframeHaveWorkedWith_FastAPI,WebframeHaveWorkedWith_Fastify,WebframeHaveWorkedWith_Flask,WebframeHaveWorkedWith_Gatsby,WebframeHaveWorkedWith_Laravel,WebframeHaveWorkedWith_Lit,WebframeHaveWorkedWith_NestJS,WebframeHaveWorkedWith_Next.js,WebframeHaveWorkedWith_Node.js,WebframeHaveWorkedWith_Nuxt.js,WebframeHaveWorkedWith_Phoenix,WebframeHaveWorkedWith_Play Framework,WebframeHaveWorkedWith_Qwik,WebframeHaveWorkedWith_React,WebframeHaveWorkedWith_Remix,WebframeHaveWorkedWith_Ruby on Rails,WebframeHaveWorkedWith_Solid.js,WebframeHaveWorkedWith_Spring Boot,WebframeHaveWorkedWith_Svelte,WebframeHaveWorkedWith_Symfony,WebframeHaveWorkedWith_Vue.js,WebframeHaveWorkedWith_WordPress,WebframeHaveWorkedWith_jQuery,MiscTechHaveWorkedWith_,MiscTechHaveWorkedWith_.NET (5+),MiscTechHaveWorkedWith_.NET Framework (1.0 - 4.8),MiscTechHaveWorkedWith_.NET MAUI,MiscTechHaveWorkedWith_Apache Kafka,MiscTechHaveWorkedWith_Apache Spark,MiscTechHaveWorkedWith_CUDA,MiscTechHaveWorkedWith_Capacitor,MiscTechHaveWorkedWith_Cordova,MiscTechHaveWorkedWith_Electron,MiscTechHaveWorkedWith_Flutter,MiscTechHaveWorkedWith_GTK,MiscTechHaveWorkedWith_Hadoop,MiscTechHaveWorkedWith_Hugging Face Transformers,MiscTechHaveWorkedWith_Ionic,MiscTechHaveWorkedWith_JAX,MiscTechHaveWorkedWith_Keras,MiscTechHaveWorkedWith_Ktor,MiscTechHaveWorkedWith_MFC,MiscTechHaveWorkedWith_Micronaut,MiscTechHaveWorkedWith_NumPy,MiscTechHaveWorkedWith_OpenGL,MiscTechHaveWorkedWith_Opencv,MiscTechHaveWorkedWith_Pandas,MiscTechHaveWorkedWith_Qt,MiscTechHaveWorkedWith_Quarkus,MiscTechHaveWorkedWith_RabbitMQ,MiscTechHaveWorkedWith_React Native,MiscTechHaveWorkedWith_Scikit-Learn,MiscTechHaveWorkedWith_Spring Framework,MiscTechHaveWorkedWith_SwiftUI,MiscTechHaveWorkedWith_Tauri,MiscTechHaveWorkedWith_TensorFlow,MiscTechHaveWorkedWith_Tidyverse,MiscTechHaveWorkedWith_Torch/PyTorch,MiscTechHaveWorkedWith_Uno Platform,MiscTechHaveWorkedWith_Xamarin,ToolsTechHaveWorkedWith_,ToolsTechHaveWorkedWith_APT,ToolsTechHaveWorkedWith_Ansible,ToolsTechHaveWorkedWith_Ant,ToolsTechHaveWorkedWith_Bun,ToolsTechHaveWorkedWith_CMake,ToolsTechHaveWorkedWith_Cargo,ToolsTechHaveWorkedWith_Catch2,ToolsTechHaveWorkedWith_Chef,ToolsTechHaveWorkedWith_Chocolatey,ToolsTechHaveWorkedWith_Composer,ToolsTechHaveWorkedWith_Dagger,ToolsTechHaveWorkedWith_Docker,ToolsTechHaveWorkedWith_GNU GCC,ToolsTechHaveWorkedWith_Godot,ToolsTechHaveWorkedWith_Google Test,ToolsTechHaveWorkedWith_Gradle,ToolsTechHaveWorkedWith_Homebrew,ToolsTechHaveWorkedWith_Kubernetes,ToolsTechHaveWorkedWith_LLVM's Clang,ToolsTechHaveWorkedWith_MSBuild,ToolsTechHaveWorkedWith_MSVC,ToolsTechHaveWorkedWith_Make,ToolsTechHaveWorkedWith_Maven (build tool),ToolsTechHaveWorkedWith_Meson,ToolsTechHaveWorkedWith_Ninja,ToolsTechHaveWorkedWith_Nix,ToolsTechHaveWorkedWith_NuGet,ToolsTechHaveWorkedWith_Pacman,ToolsTechHaveWorkedWith_Pip,ToolsTechHaveWorkedWith_Podman,ToolsTechHaveWorkedWith_Pulumi,ToolsTechHaveWorkedWith_Puppet,ToolsTechHaveWorkedWith_QMake,ToolsTechHaveWorkedWith_SCons,ToolsTechHaveWorkedWith_Terraform,ToolsTechHaveWorkedWith_Unity 3D,ToolsTechHaveWorkedWith_Unreal Engine,ToolsTechHaveWorkedWith_Visual Studio Solution,ToolsTechHaveWorkedWith_Vite,ToolsTechHaveWorkedWith_Wasmer,ToolsTechHaveWorkedWith_Webpack,ToolsTechHaveWorkedWith_Yarn,ToolsTechHaveWorkedWith_bandit,ToolsTechHaveWorkedWith_build2,ToolsTechHaveWorkedWith_cppunit,ToolsTechHaveWorkedWith_doctest,ToolsTechHaveWorkedWith_lest,ToolsTechHaveWorkedWith_npm,ToolsTechHaveWorkedWith_pnpm,NEWCollabToolsHaveWorkedWith_,NEWCollabToolsHaveWorkedWith_Android Studio,NEWCollabToolsHaveWorkedWith_Atom,NEWCollabToolsHaveWorkedWith_BBEdit,NEWCollabToolsHaveWorkedWith_CLion,NEWCollabToolsHaveWorkedWith_Code::Blocks,NEWCollabToolsHaveWorkedWith_DataGrip,NEWCollabToolsHaveWorkedWith_Eclipse,NEWCollabToolsHaveWorkedWith_Emacs,NEWCollabToolsHaveWorkedWith_Fleet,NEWCollabToolsHaveWorkedWith_Geany,NEWCollabToolsHaveWorkedWith_Goland,NEWCollabToolsHaveWorkedWith_Helix,NEWCollabToolsHaveWorkedWith_IPython,NEWCollabToolsHaveWorkedWith_IntelliJ IDEA,NEWCollabToolsHaveWorkedWith_Jupyter Notebook/JupyterLab,NEWCollabToolsHaveWorkedWith_Kate,NEWCollabToolsHaveWorkedWith_Micro,NEWCollabToolsHaveWorkedWith_Nano,NEWCollabToolsHaveWorkedWith_Neovim,NEWCollabToolsHaveWorkedWith_Netbeans,NEWCollabToolsHaveWorkedWith_Notepad++,NEWCollabToolsHaveWorkedWith_Nova,NEWCollabToolsHaveWorkedWith_PhpStorm,NEWCollabToolsHaveWorkedWith_PyCharm,NEWCollabToolsHaveWorkedWith_Qt Creator,NEWCollabToolsHaveWorkedWith_RStudio,"NEWCollabToolsHaveWorkedWith_Rad Studio (Delphi, C++ Builder)",NEWCollabToolsHaveWorkedWith_Rider,NEWCollabToolsHaveWorkedWith_RubyMine,NEWCollabToolsHaveWorkedWith_Spyder,NEWCollabToolsHaveWorkedWith_Sublime Text,NEWCollabToolsHaveWorkedWith_TextMate,NEWCollabToolsHaveWorkedWith_VSCodium,NEWCollabToolsHaveWorkedWith_Vim,NEWCollabToolsHaveWorkedWith_Visual Studio,NEWCollabToolsHaveWorkedWith_Visual Studio Code,NEWCollabToolsHaveWorkedWith_WebStorm,NEWCollabToolsHaveWorkedWith_Xcode,NEWCollabToolsHaveWorkedWith_condo,OfficeStackAsyncHaveWorkedWith_,OfficeStackAsyncHaveWorkedWith_Adobe Workfront,OfficeStackAsyncHaveWorkedWith_Airtable,OfficeStackAsyncHaveWorkedWith_Asana,OfficeStackAsyncHaveWorkedWith_Azure Devops,OfficeStackAsyncHaveWorkedWith_Basecamp,OfficeStackAsyncHaveWorkedWith_Clickup,OfficeStackAsyncHaveWorkedWith_Confluence,OfficeStackAsyncHaveWorkedWith_Doxygen,OfficeStackAsyncHaveWorkedWith_GitHub Discussions,OfficeStackAsyncHaveWorkedWith_Jira,OfficeStackAsyncHaveWorkedWith_Linear,OfficeStackAsyncHaveWorkedWith_Markdown File,OfficeStackAsyncHaveWorkedWith_Microsoft Lists,OfficeStackAsyncHaveWorkedWith_Microsoft Planner,OfficeStackAsyncHaveWorkedWith_Miro,OfficeStackAsyncHaveWorkedWith_Monday.com,OfficeStackAsyncHaveWorkedWith_Notion,OfficeStackAsyncHaveWorkedWith_Nuclino,OfficeStackAsyncHaveWorkedWith_Planview Projectplace Or Clarizen,OfficeStackAsyncHaveWorkedWith_Redmine,OfficeStackAsyncHaveWorkedWith_Redocly,OfficeStackAsyncHaveWorkedWith_Shortcut,OfficeStackAsyncHaveWorkedWith_Smartsheet,OfficeStackAsyncHaveWorkedWith_Stack Overflow for Teams,OfficeStackAsyncHaveWorkedWith_Swit,OfficeStackAsyncHaveWorkedWith_Trello,OfficeStackAsyncHaveWorkedWith_Wikis,OfficeStackAsyncHaveWorkedWith_Wrike,OfficeStackAsyncHaveWorkedWith_YouTrack,OfficeStackSyncHaveWorkedWith_,OfficeStackSyncHaveWorkedWith_Cisco Webex Teams,OfficeStackSyncHaveWorkedWith_Discord,OfficeStackSyncHaveWorkedWith_Google Chat,OfficeStackSyncHaveWorkedWith_Google Meet,OfficeStackSyncHaveWorkedWith_IRC,OfficeStackSyncHaveWorkedWith_Jitsi,OfficeStackSyncHaveWorkedWith_Matrix,OfficeStackSyncHaveWorkedWith_Mattermost,OfficeStackSyncHaveWorkedWith_Microsoft Teams,OfficeStackSyncHaveWorkedWith_Ringcentral,OfficeStackSyncHaveWorkedWith_Rocketchat,OfficeStackSyncHaveWorkedWith_Signal,OfficeStackSyncHaveWorkedWith_Skype,OfficeStackSyncHaveWorkedWith_Slack,OfficeStackSyncHaveWorkedWith_Symphony,OfficeStackSyncHaveWorkedWith_Telegram,OfficeStackSyncHaveWorkedWith_Unify Circuit,OfficeStackSyncHaveWorkedWith_Whatsapp,OfficeStackSyncHaveWorkedWith_Wickr,OfficeStackSyncHaveWorkedWith_Wire,OfficeStackSyncHaveWorkedWith_Zoom,OfficeStackSyncHaveWorkedWith_Zulip,AISearchHaveWorkedWith_,AISearchHaveWorkedWith_Andi,AISearchHaveWorkedWith_Bing AI,AISearchHaveWorkedWith_ChatGPT,AISearchHaveWorkedWith_Google Bard AI,AISearchHaveWorkedWith_Perplexity AI,AISearchHaveWorkedWith_Phind,AISearchHaveWorkedWith_Quora Poe,AISearchHaveWorkedWith_WolframAlpha,AISearchHaveWorkedWith_You.com,AIDevHaveWorkedWith_,AIDevHaveWorkedWith_AWS CodeWhisperer,AIDevHaveWorkedWith_Codeium,AIDevHaveWorkedWith_GitHub Copilot,AIDevHaveWorkedWith_Synk Code,AIDevHaveWorkedWith_Tabnine,AIDevHaveWorkedWith_Whispr AI,AIAcc_,AIAcc_Greater efficiency,AIAcc_Improve accuracy in coding,AIAcc_Improve collaboration,AIAcc_Increase productivity,AIAcc_Other (please explain),AIAcc_Speed up learning,AIBen_,AIBen_Highly distrust,AIBen_Highly trust,AIBen_Neither trust nor distrust,AIBen_Somewhat distrust,AIBen_Somewhat trust,AIToolCurrently Using_,AIToolCurrently Using_Collaborating with teammates,AIToolCurrently Using_Committing and reviewing code,AIToolCurrently Using_Debugging and getting help,AIToolCurrently Using_Deployment and monitoring,AIToolCurrently Using_Documenting code,AIToolCurrently Using_Learning about a codebase,AIToolCurrently Using_Other (please describe),AIToolCurrently Using_Project planning,AIToolCurrently Using_Testing code,AIToolCurrently Using_Writing code
0,5,5,0,10,0,35000.0,2,4,1.0,1.0,1.0,0,0,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,50361.0,52973.549052,53507.215094,55708.823657,0.0,0.0,0.0,1.0,0.0,0,1,0,0,0,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1
1,5,3,15,15,0,70000.0,1,3,1.0,1.0,1.0,1,2,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,52341.025641,54852.357875,53507.215094,55708.823657,0.0,1.0,0.0,0.0,0.0,0,0,1,0,0,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
2,5,4,5,3,0,25000.0,1,0,0.0,1.0,0.0,2,1,0.0,1.0,0.0,1,0,0,1,0,0.0,1.0,50908.278146,54453.590602,50500.0,53536.351028,0.0,1.0,0.0,0.0,0.0,0,1,0,0,1,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
3,5,4,4,2,0,35000.0,2,3,1.0,1.0,1.0,-1,-1,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,58952.191011,54870.263257,39500.0,52893.454509,0.0,0.0,1.0,0.0,0.0,0,0,0,1,0,0.0,0.0,0.0,0.0,1.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,6,15,10,0,42000.0,2,4,1.0,1.0,1.0,-1,-1,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,67207.588235,53515.451768,42000.0,96000.0,0.0,0.0,1.0,0.0,0.0,0,0,0,1,0,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1


In [206]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1150 entries, 0 to 1149
Columns: 466 entries, MainBranch to AIToolCurrently Using_Writing code
dtypes: float64(23), int32(436), int64(7)
memory usage: 2.2 MB


In [207]:
# Exporto este df final, para hacer concat con el de 2024 y trabajar con uno solo en el modelo. 
# Construir la ruta relativa a la carpeta Pickles
ruta_pickles = os.path.join("..", "Pickles")

# Asegurarse de que la carpeta Pickles exista
os.makedirs(ruta_pickles, exist_ok=True)

# Guardar los DataFrames en formato pickle en la carpeta Pickles
with open(os.path.join(ruta_pickles, 'data_23.pickle'), 'wb') as archivo:
    pickle.dump(df, archivo)

In [208]:
df

Unnamed: 0,MainBranch,EdLevel,YearsCode,YearsCodePro,PurchaseInfluence,CompTotal,AISelect,AISent,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,Age_Grouped_Adulto,Age_Grouped_Joven,Age_Grouped_Senior,is_full_time,is_part_time,is_independent,num_jobs,is_other_employment,Remote_grouped_In-person,Remote_grouped_Remote/Hybrid,CodingActivities_encoded,LearnCode_encoded,LearnCodeOnline_encoded,DevType_encoded,OrgSize_Grouped_Desconocido,OrgSize_Grouped_Grande,OrgSize_Grouped_Mediana,OrgSize_Grouped_Muy Pequeña,OrgSize_Grouped_Pequeña,Android-based,Linux-based,MacOS,Otros,Windows-based,Industry_Category_Industria y Energía,Industry_Category_Otros Servicios,Industry_Category_Salud y Educación,Industry_Category_Servicios Financieros,Industry_Category_Tecnología y Servicios Digitales,BuyNewTool_Ask a generative AI tool,BuyNewTool_Ask developers I know/work with,BuyNewTool_Other,BuyNewTool_Other (please specify):,BuyNewTool_Read ratings or reviews on third party sites like G2 Crowd,BuyNewTool_Research companies that have advertised on sites I visit,BuyNewTool_Research companies that have emailed me,BuyNewTool_Start a free trial,BuyNewTool_Visit developer communities like Stack Overflow,LanguageHaveWorkedWith_,LanguageHaveWorkedWith_Ada,LanguageHaveWorkedWith_Apex,LanguageHaveWorkedWith_Assembly,LanguageHaveWorkedWith_Bash/Shell (all shells),LanguageHaveWorkedWith_C,LanguageHaveWorkedWith_C#,LanguageHaveWorkedWith_C++,LanguageHaveWorkedWith_Clojure,LanguageHaveWorkedWith_Cobol,LanguageHaveWorkedWith_Crystal,LanguageHaveWorkedWith_Dart,LanguageHaveWorkedWith_Delphi,LanguageHaveWorkedWith_Elixir,LanguageHaveWorkedWith_Erlang,LanguageHaveWorkedWith_F#,LanguageHaveWorkedWith_Flow,LanguageHaveWorkedWith_Fortran,LanguageHaveWorkedWith_GDScript,LanguageHaveWorkedWith_Go,LanguageHaveWorkedWith_Groovy,LanguageHaveWorkedWith_HTML/CSS,LanguageHaveWorkedWith_Haskell,LanguageHaveWorkedWith_Java,LanguageHaveWorkedWith_JavaScript,LanguageHaveWorkedWith_Julia,LanguageHaveWorkedWith_Kotlin,LanguageHaveWorkedWith_Lisp,LanguageHaveWorkedWith_Lua,LanguageHaveWorkedWith_MATLAB,LanguageHaveWorkedWith_Nim,LanguageHaveWorkedWith_OCaml,LanguageHaveWorkedWith_Objective-C,LanguageHaveWorkedWith_PHP,LanguageHaveWorkedWith_Perl,LanguageHaveWorkedWith_PowerShell,LanguageHaveWorkedWith_Prolog,LanguageHaveWorkedWith_Python,LanguageHaveWorkedWith_R,LanguageHaveWorkedWith_Raku,LanguageHaveWorkedWith_Ruby,LanguageHaveWorkedWith_Rust,LanguageHaveWorkedWith_SAS,LanguageHaveWorkedWith_SQL,LanguageHaveWorkedWith_Scala,LanguageHaveWorkedWith_Solidity,LanguageHaveWorkedWith_Swift,LanguageHaveWorkedWith_TypeScript,LanguageHaveWorkedWith_VBA,LanguageHaveWorkedWith_Visual Basic (.Net),LanguageHaveWorkedWith_Zig,LanguageWantToWorkWith_,LanguageWantToWorkWith_APL,LanguageWantToWorkWith_Ada,LanguageWantToWorkWith_Apex,LanguageWantToWorkWith_Assembly,LanguageWantToWorkWith_Bash/Shell (all shells),LanguageWantToWorkWith_C,LanguageWantToWorkWith_C#,LanguageWantToWorkWith_C++,LanguageWantToWorkWith_Clojure,LanguageWantToWorkWith_Cobol,LanguageWantToWorkWith_Crystal,LanguageWantToWorkWith_Dart,LanguageWantToWorkWith_Delphi,LanguageWantToWorkWith_Elixir,LanguageWantToWorkWith_Erlang,LanguageWantToWorkWith_F#,LanguageWantToWorkWith_Flow,LanguageWantToWorkWith_Fortran,LanguageWantToWorkWith_GDScript,LanguageWantToWorkWith_Go,LanguageWantToWorkWith_Groovy,LanguageWantToWorkWith_HTML/CSS,LanguageWantToWorkWith_Haskell,LanguageWantToWorkWith_Java,LanguageWantToWorkWith_JavaScript,LanguageWantToWorkWith_Julia,LanguageWantToWorkWith_Kotlin,LanguageWantToWorkWith_Lisp,LanguageWantToWorkWith_Lua,LanguageWantToWorkWith_MATLAB,LanguageWantToWorkWith_Nim,LanguageWantToWorkWith_OCaml,LanguageWantToWorkWith_Objective-C,LanguageWantToWorkWith_PHP,LanguageWantToWorkWith_Perl,LanguageWantToWorkWith_PowerShell,LanguageWantToWorkWith_Prolog,LanguageWantToWorkWith_Python,LanguageWantToWorkWith_R,LanguageWantToWorkWith_Raku,LanguageWantToWorkWith_Ruby,LanguageWantToWorkWith_Rust,LanguageWantToWorkWith_SAS,LanguageWantToWorkWith_SQL,LanguageWantToWorkWith_Scala,LanguageWantToWorkWith_Solidity,LanguageWantToWorkWith_Swift,LanguageWantToWorkWith_TypeScript,LanguageWantToWorkWith_VBA,LanguageWantToWorkWith_Visual Basic (.Net),LanguageWantToWorkWith_Zig,DatabaseHaveWorkedWith_,DatabaseHaveWorkedWith_BigQuery,DatabaseHaveWorkedWith_Cassandra,DatabaseHaveWorkedWith_Clickhouse,DatabaseHaveWorkedWith_Cloud Firestore,DatabaseHaveWorkedWith_Cockroachdb,DatabaseHaveWorkedWith_Cosmos DB,DatabaseHaveWorkedWith_Couch DB,DatabaseHaveWorkedWith_Couchbase,DatabaseHaveWorkedWith_Datomic,DatabaseHaveWorkedWith_DuckDB,DatabaseHaveWorkedWith_Dynamodb,DatabaseHaveWorkedWith_Elasticsearch,DatabaseHaveWorkedWith_Firebase Realtime Database,DatabaseHaveWorkedWith_Firebird,DatabaseHaveWorkedWith_H2,DatabaseHaveWorkedWith_IBM DB2,DatabaseHaveWorkedWith_InfluxDB,DatabaseHaveWorkedWith_MariaDB,DatabaseHaveWorkedWith_Microsoft Access,DatabaseHaveWorkedWith_Microsoft SQL Server,DatabaseHaveWorkedWith_MongoDB,DatabaseHaveWorkedWith_MySQL,DatabaseHaveWorkedWith_Neo4J,DatabaseHaveWorkedWith_Oracle,DatabaseHaveWorkedWith_PostgreSQL,DatabaseHaveWorkedWith_RavenDB,DatabaseHaveWorkedWith_Redis,DatabaseHaveWorkedWith_SQLite,DatabaseHaveWorkedWith_Snowflake,DatabaseHaveWorkedWith_Solr,DatabaseHaveWorkedWith_Supabase,PlatformHaveWorkedWith_,PlatformHaveWorkedWith_Amazon Web Services (AWS),PlatformHaveWorkedWith_Cloudflare,PlatformHaveWorkedWith_Colocation,PlatformHaveWorkedWith_Digital Ocean,PlatformHaveWorkedWith_Firebase,PlatformHaveWorkedWith_Fly.io,PlatformHaveWorkedWith_Google Cloud,PlatformHaveWorkedWith_Heroku,PlatformHaveWorkedWith_Hetzner,PlatformHaveWorkedWith_IBM Cloud Or Watson,"PlatformHaveWorkedWith_Linode, now Akamai",PlatformHaveWorkedWith_Managed Hosting,PlatformHaveWorkedWith_Microsoft Azure,PlatformHaveWorkedWith_Netlify,PlatformHaveWorkedWith_OVH,PlatformHaveWorkedWith_OpenShift,PlatformHaveWorkedWith_OpenStack,PlatformHaveWorkedWith_Oracle Cloud Infrastructure (OCI),PlatformHaveWorkedWith_Render,PlatformHaveWorkedWith_Scaleway,PlatformHaveWorkedWith_VMware,PlatformHaveWorkedWith_Vercel,PlatformHaveWorkedWith_Vultr,WebframeHaveWorkedWith_,WebframeHaveWorkedWith_ASP.NET,WebframeHaveWorkedWith_ASP.NET CORE,WebframeHaveWorkedWith_Angular,WebframeHaveWorkedWith_AngularJS,WebframeHaveWorkedWith_Blazor,WebframeHaveWorkedWith_CodeIgniter,WebframeHaveWorkedWith_Deno,WebframeHaveWorkedWith_Django,WebframeHaveWorkedWith_Drupal,WebframeHaveWorkedWith_Elm,WebframeHaveWorkedWith_Express,WebframeHaveWorkedWith_FastAPI,WebframeHaveWorkedWith_Fastify,WebframeHaveWorkedWith_Flask,WebframeHaveWorkedWith_Gatsby,WebframeHaveWorkedWith_Laravel,WebframeHaveWorkedWith_Lit,WebframeHaveWorkedWith_NestJS,WebframeHaveWorkedWith_Next.js,WebframeHaveWorkedWith_Node.js,WebframeHaveWorkedWith_Nuxt.js,WebframeHaveWorkedWith_Phoenix,WebframeHaveWorkedWith_Play Framework,WebframeHaveWorkedWith_Qwik,WebframeHaveWorkedWith_React,WebframeHaveWorkedWith_Remix,WebframeHaveWorkedWith_Ruby on Rails,WebframeHaveWorkedWith_Solid.js,WebframeHaveWorkedWith_Spring Boot,WebframeHaveWorkedWith_Svelte,WebframeHaveWorkedWith_Symfony,WebframeHaveWorkedWith_Vue.js,WebframeHaveWorkedWith_WordPress,WebframeHaveWorkedWith_jQuery,MiscTechHaveWorkedWith_,MiscTechHaveWorkedWith_.NET (5+),MiscTechHaveWorkedWith_.NET Framework (1.0 - 4.8),MiscTechHaveWorkedWith_.NET MAUI,MiscTechHaveWorkedWith_Apache Kafka,MiscTechHaveWorkedWith_Apache Spark,MiscTechHaveWorkedWith_CUDA,MiscTechHaveWorkedWith_Capacitor,MiscTechHaveWorkedWith_Cordova,MiscTechHaveWorkedWith_Electron,MiscTechHaveWorkedWith_Flutter,MiscTechHaveWorkedWith_GTK,MiscTechHaveWorkedWith_Hadoop,MiscTechHaveWorkedWith_Hugging Face Transformers,MiscTechHaveWorkedWith_Ionic,MiscTechHaveWorkedWith_JAX,MiscTechHaveWorkedWith_Keras,MiscTechHaveWorkedWith_Ktor,MiscTechHaveWorkedWith_MFC,MiscTechHaveWorkedWith_Micronaut,MiscTechHaveWorkedWith_NumPy,MiscTechHaveWorkedWith_OpenGL,MiscTechHaveWorkedWith_Opencv,MiscTechHaveWorkedWith_Pandas,MiscTechHaveWorkedWith_Qt,MiscTechHaveWorkedWith_Quarkus,MiscTechHaveWorkedWith_RabbitMQ,MiscTechHaveWorkedWith_React Native,MiscTechHaveWorkedWith_Scikit-Learn,MiscTechHaveWorkedWith_Spring Framework,MiscTechHaveWorkedWith_SwiftUI,MiscTechHaveWorkedWith_Tauri,MiscTechHaveWorkedWith_TensorFlow,MiscTechHaveWorkedWith_Tidyverse,MiscTechHaveWorkedWith_Torch/PyTorch,MiscTechHaveWorkedWith_Uno Platform,MiscTechHaveWorkedWith_Xamarin,ToolsTechHaveWorkedWith_,ToolsTechHaveWorkedWith_APT,ToolsTechHaveWorkedWith_Ansible,ToolsTechHaveWorkedWith_Ant,ToolsTechHaveWorkedWith_Bun,ToolsTechHaveWorkedWith_CMake,ToolsTechHaveWorkedWith_Cargo,ToolsTechHaveWorkedWith_Catch2,ToolsTechHaveWorkedWith_Chef,ToolsTechHaveWorkedWith_Chocolatey,ToolsTechHaveWorkedWith_Composer,ToolsTechHaveWorkedWith_Dagger,ToolsTechHaveWorkedWith_Docker,ToolsTechHaveWorkedWith_GNU GCC,ToolsTechHaveWorkedWith_Godot,ToolsTechHaveWorkedWith_Google Test,ToolsTechHaveWorkedWith_Gradle,ToolsTechHaveWorkedWith_Homebrew,ToolsTechHaveWorkedWith_Kubernetes,ToolsTechHaveWorkedWith_LLVM's Clang,ToolsTechHaveWorkedWith_MSBuild,ToolsTechHaveWorkedWith_MSVC,ToolsTechHaveWorkedWith_Make,ToolsTechHaveWorkedWith_Maven (build tool),ToolsTechHaveWorkedWith_Meson,ToolsTechHaveWorkedWith_Ninja,ToolsTechHaveWorkedWith_Nix,ToolsTechHaveWorkedWith_NuGet,ToolsTechHaveWorkedWith_Pacman,ToolsTechHaveWorkedWith_Pip,ToolsTechHaveWorkedWith_Podman,ToolsTechHaveWorkedWith_Pulumi,ToolsTechHaveWorkedWith_Puppet,ToolsTechHaveWorkedWith_QMake,ToolsTechHaveWorkedWith_SCons,ToolsTechHaveWorkedWith_Terraform,ToolsTechHaveWorkedWith_Unity 3D,ToolsTechHaveWorkedWith_Unreal Engine,ToolsTechHaveWorkedWith_Visual Studio Solution,ToolsTechHaveWorkedWith_Vite,ToolsTechHaveWorkedWith_Wasmer,ToolsTechHaveWorkedWith_Webpack,ToolsTechHaveWorkedWith_Yarn,ToolsTechHaveWorkedWith_bandit,ToolsTechHaveWorkedWith_build2,ToolsTechHaveWorkedWith_cppunit,ToolsTechHaveWorkedWith_doctest,ToolsTechHaveWorkedWith_lest,ToolsTechHaveWorkedWith_npm,ToolsTechHaveWorkedWith_pnpm,NEWCollabToolsHaveWorkedWith_,NEWCollabToolsHaveWorkedWith_Android Studio,NEWCollabToolsHaveWorkedWith_Atom,NEWCollabToolsHaveWorkedWith_BBEdit,NEWCollabToolsHaveWorkedWith_CLion,NEWCollabToolsHaveWorkedWith_Code::Blocks,NEWCollabToolsHaveWorkedWith_DataGrip,NEWCollabToolsHaveWorkedWith_Eclipse,NEWCollabToolsHaveWorkedWith_Emacs,NEWCollabToolsHaveWorkedWith_Fleet,NEWCollabToolsHaveWorkedWith_Geany,NEWCollabToolsHaveWorkedWith_Goland,NEWCollabToolsHaveWorkedWith_Helix,NEWCollabToolsHaveWorkedWith_IPython,NEWCollabToolsHaveWorkedWith_IntelliJ IDEA,NEWCollabToolsHaveWorkedWith_Jupyter Notebook/JupyterLab,NEWCollabToolsHaveWorkedWith_Kate,NEWCollabToolsHaveWorkedWith_Micro,NEWCollabToolsHaveWorkedWith_Nano,NEWCollabToolsHaveWorkedWith_Neovim,NEWCollabToolsHaveWorkedWith_Netbeans,NEWCollabToolsHaveWorkedWith_Notepad++,NEWCollabToolsHaveWorkedWith_Nova,NEWCollabToolsHaveWorkedWith_PhpStorm,NEWCollabToolsHaveWorkedWith_PyCharm,NEWCollabToolsHaveWorkedWith_Qt Creator,NEWCollabToolsHaveWorkedWith_RStudio,"NEWCollabToolsHaveWorkedWith_Rad Studio (Delphi, C++ Builder)",NEWCollabToolsHaveWorkedWith_Rider,NEWCollabToolsHaveWorkedWith_RubyMine,NEWCollabToolsHaveWorkedWith_Spyder,NEWCollabToolsHaveWorkedWith_Sublime Text,NEWCollabToolsHaveWorkedWith_TextMate,NEWCollabToolsHaveWorkedWith_VSCodium,NEWCollabToolsHaveWorkedWith_Vim,NEWCollabToolsHaveWorkedWith_Visual Studio,NEWCollabToolsHaveWorkedWith_Visual Studio Code,NEWCollabToolsHaveWorkedWith_WebStorm,NEWCollabToolsHaveWorkedWith_Xcode,NEWCollabToolsHaveWorkedWith_condo,OfficeStackAsyncHaveWorkedWith_,OfficeStackAsyncHaveWorkedWith_Adobe Workfront,OfficeStackAsyncHaveWorkedWith_Airtable,OfficeStackAsyncHaveWorkedWith_Asana,OfficeStackAsyncHaveWorkedWith_Azure Devops,OfficeStackAsyncHaveWorkedWith_Basecamp,OfficeStackAsyncHaveWorkedWith_Clickup,OfficeStackAsyncHaveWorkedWith_Confluence,OfficeStackAsyncHaveWorkedWith_Doxygen,OfficeStackAsyncHaveWorkedWith_GitHub Discussions,OfficeStackAsyncHaveWorkedWith_Jira,OfficeStackAsyncHaveWorkedWith_Linear,OfficeStackAsyncHaveWorkedWith_Markdown File,OfficeStackAsyncHaveWorkedWith_Microsoft Lists,OfficeStackAsyncHaveWorkedWith_Microsoft Planner,OfficeStackAsyncHaveWorkedWith_Miro,OfficeStackAsyncHaveWorkedWith_Monday.com,OfficeStackAsyncHaveWorkedWith_Notion,OfficeStackAsyncHaveWorkedWith_Nuclino,OfficeStackAsyncHaveWorkedWith_Planview Projectplace Or Clarizen,OfficeStackAsyncHaveWorkedWith_Redmine,OfficeStackAsyncHaveWorkedWith_Redocly,OfficeStackAsyncHaveWorkedWith_Shortcut,OfficeStackAsyncHaveWorkedWith_Smartsheet,OfficeStackAsyncHaveWorkedWith_Stack Overflow for Teams,OfficeStackAsyncHaveWorkedWith_Swit,OfficeStackAsyncHaveWorkedWith_Trello,OfficeStackAsyncHaveWorkedWith_Wikis,OfficeStackAsyncHaveWorkedWith_Wrike,OfficeStackAsyncHaveWorkedWith_YouTrack,OfficeStackSyncHaveWorkedWith_,OfficeStackSyncHaveWorkedWith_Cisco Webex Teams,OfficeStackSyncHaveWorkedWith_Discord,OfficeStackSyncHaveWorkedWith_Google Chat,OfficeStackSyncHaveWorkedWith_Google Meet,OfficeStackSyncHaveWorkedWith_IRC,OfficeStackSyncHaveWorkedWith_Jitsi,OfficeStackSyncHaveWorkedWith_Matrix,OfficeStackSyncHaveWorkedWith_Mattermost,OfficeStackSyncHaveWorkedWith_Microsoft Teams,OfficeStackSyncHaveWorkedWith_Ringcentral,OfficeStackSyncHaveWorkedWith_Rocketchat,OfficeStackSyncHaveWorkedWith_Signal,OfficeStackSyncHaveWorkedWith_Skype,OfficeStackSyncHaveWorkedWith_Slack,OfficeStackSyncHaveWorkedWith_Symphony,OfficeStackSyncHaveWorkedWith_Telegram,OfficeStackSyncHaveWorkedWith_Unify Circuit,OfficeStackSyncHaveWorkedWith_Whatsapp,OfficeStackSyncHaveWorkedWith_Wickr,OfficeStackSyncHaveWorkedWith_Wire,OfficeStackSyncHaveWorkedWith_Zoom,OfficeStackSyncHaveWorkedWith_Zulip,AISearchHaveWorkedWith_,AISearchHaveWorkedWith_Andi,AISearchHaveWorkedWith_Bing AI,AISearchHaveWorkedWith_ChatGPT,AISearchHaveWorkedWith_Google Bard AI,AISearchHaveWorkedWith_Perplexity AI,AISearchHaveWorkedWith_Phind,AISearchHaveWorkedWith_Quora Poe,AISearchHaveWorkedWith_WolframAlpha,AISearchHaveWorkedWith_You.com,AIDevHaveWorkedWith_,AIDevHaveWorkedWith_AWS CodeWhisperer,AIDevHaveWorkedWith_Codeium,AIDevHaveWorkedWith_GitHub Copilot,AIDevHaveWorkedWith_Synk Code,AIDevHaveWorkedWith_Tabnine,AIDevHaveWorkedWith_Whispr AI,AIAcc_,AIAcc_Greater efficiency,AIAcc_Improve accuracy in coding,AIAcc_Improve collaboration,AIAcc_Increase productivity,AIAcc_Other (please explain),AIAcc_Speed up learning,AIBen_,AIBen_Highly distrust,AIBen_Highly trust,AIBen_Neither trust nor distrust,AIBen_Somewhat distrust,AIBen_Somewhat trust,AIToolCurrently Using_,AIToolCurrently Using_Collaborating with teammates,AIToolCurrently Using_Committing and reviewing code,AIToolCurrently Using_Debugging and getting help,AIToolCurrently Using_Deployment and monitoring,AIToolCurrently Using_Documenting code,AIToolCurrently Using_Learning about a codebase,AIToolCurrently Using_Other (please describe),AIToolCurrently Using_Project planning,AIToolCurrently Using_Testing code,AIToolCurrently Using_Writing code
0,5,5,0,10,0,35000.0,2,4,1.0,1.0,1.0,0,0,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,50361.000000,52973.549052,53507.215094,55708.823657,0.0,0.0,0.0,1.0,0.0,0,1,0,0,0,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1
1,5,3,15,15,0,70000.0,1,3,1.0,1.0,1.0,1,2,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,52341.025641,54852.357875,53507.215094,55708.823657,0.0,1.0,0.0,0.0,0.0,0,0,1,0,0,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
2,5,4,5,3,0,25000.0,1,0,0.0,1.0,0.0,2,1,0.0,1.0,0.0,1,0,0,1,0,0.0,1.0,50908.278146,54453.590602,50500.000000,53536.351028,0.0,1.0,0.0,0.0,0.0,0,1,0,0,1,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
3,5,4,4,2,0,35000.0,2,3,1.0,1.0,1.0,-1,-1,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,58952.191011,54870.263257,39500.000000,52893.454509,0.0,0.0,1.0,0.0,0.0,0,0,0,1,0,0.0,0.0,0.0,0.0,1.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,6,15,10,0,42000.0,2,4,1.0,1.0,1.0,-1,-1,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,67207.588235,53515.451768,42000.000000,96000.000000,0.0,0.0,1.0,0.0,0.0,0,0,0,1,0,0.0,0.0,0.0,0.0,1.0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1145,5,4,8,6,1,51000.0,2,4,1.0,1.0,1.0,1,0,1.0,0.0,0.0,1,0,0,1,0,0.0,1.0,51105.882353,59271.481885,50722.222222,53536.351028,0.0,0.0,0.0,0.0,1.0,0,0,1,0,0,0.0,0.0,0.0,0.0,1.0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1
1146,5,4,8,3,2,100000.0,2,5,1.0,1.0,1.0,-1,-1,0.0,1.0,0.0,1,0,0,1,0,0.0,1.0,59111.111111,57355.208510,100000.000000,109333.333333,0.0,0.0,0.0,1.0,0.0,0,1,0,0,0,0.0,0.0,0.0,0.0,1.0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1
1147,5,4,6,2,2,27000.0,2,5,1.0,1.0,1.0,1,1,0.0,1.0,0.0,1,0,0,1,0,0.0,1.0,52341.025641,54870.263257,37500.000000,53536.351028,0.0,0.0,0.0,1.0,0.0,0,1,0,0,1,0.0,0.0,0.0,0.0,1.0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1
1148,5,4,12,6,0,30000.0,0,0,0.0,0.0,0.0,1,2,1.0,0.0,0.0,1,0,0,1,0,1.0,0.0,50361.000000,57815.823382,67444.444444,55708.823657,0.0,0.0,0.0,0.0,1.0,0,0,0,0,1,0.0,0.0,0.0,0.0,1.0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [209]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1150 entries, 0 to 1149
Columns: 466 entries, MainBranch to AIToolCurrently Using_Writing code
dtypes: float64(23), int32(436), int64(7)
memory usage: 2.2 MB


In [210]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1150 entries, 0 to 1149
Columns: 466 entries, MainBranch to AIToolCurrently Using_Writing code
dtypes: float64(23), int32(436), int64(7)
memory usage: 2.2 MB
