# Hello! Here you can find an analysis developer vacancies on Djinni.

### 1. Start scraping vacancies from djinni.co with your skill and save the results to djinni.csv. Just put your skill to keyword="your skill"

In [None]:
!cd djinni_scrape && scrapy crawl djinni_jobs -a keyword=Python

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import re

### 2. Check data in djinni.csv

In [None]:
df = pd.read_csv('djinni_scrape/djinni.csv', skiprows=[0])
print(df)

### 3. Assign meaningful column names

In [None]:
df.columns = ['Job title', 'Experience', 'Technologies', 'Views', 'Salary']

### 4. Create correlation matrix

In [None]:
# Convert Experience and Views columns to numeric type
df['Experience'] = pd.to_numeric(df['Experience'], errors='coerce')
df['Views'] = pd.to_numeric(df['Views'], errors='coerce')

# Subset the DataFrame to include only 'Experience', 'Views' columns
subset_df = df[['Experience', 'Views']]

# Calculate the correlation matrix
correlation_matrix = subset_df.corr()

# Display the correlation matrix
print(correlation_matrix)

### 5. Calculate the maximum and minimum views for years of experience

In [None]:
# Group the data by years of experience and calculate the maximum and minimum views
experience_stats = df.groupby('Experience')['Views'].agg(['min', 'max'])

# Reset the index to convert the grouped data into a DataFrame
experience_stats = experience_stats.reset_index()

# Create a bar plot
plt.bar(experience_stats['Experience'], experience_stats['max'], label='Max Views')
plt.bar(experience_stats['Experience'], experience_stats['min'], label='Min Views')
plt.xlabel('Years of Experience')
plt.ylabel('Views')
plt.title('Max and Min Views by Years of Experience')
plt.legend()
plt.xticks(experience_stats['Experience'])  # Set x-axis tick values to be the years of experience

# Add text labels with min and max views
for i in range(len(experience_stats)):
    plt.text(experience_stats['Experience'][i], experience_stats['max'][i], str(experience_stats['max'][i]), ha='center', va='bottom')
    plt.text(experience_stats['Experience'][i], experience_stats['min'][i], str(experience_stats['min'][i]), ha='center', va='bottom')

plt.show()

### 6. Check list of additional technologies for your skill

In [None]:
# Extract the Technologies column
technologies = df['Technologies']

# Create a set to store unique technologies
unique_technologies = set()

# Iterate over the technologies column and split the string
for tech_list in technologies.dropna():
    techs = [tech.strip().lower() for tech in tech_list.split(',')]
    unique_technologies.update(techs)

# Print the unique technologies
for tech in unique_technologies:
    print(tech)

### 7. Check top 5 most popular technologies for your skill

In [None]:
# Extract the Technologies column
technologies = df['Technologies']

# Create a list to store technology counts
technology_counts = []

# Iterate over the technologies column and count the occurrences of each technology
for tech_list in technologies.dropna():
    techs = [tech.strip().lower() for tech in tech_list.split(',')]
    technology_counts.extend(techs)

# Convert the list to a pandas Series
technology_counts = pd.Series(technology_counts)

# Get the top 5 most popular technologies
top_technologies = technology_counts.value_counts().head(5)

# Create labels and counts for the pie chart
labels = top_technologies.index
counts = top_technologies.values

# Plot the pie chart
plt.pie(counts, labels=labels, autopct='%1.1f%%')
plt.title('Top 5 Most Popular Technologies')
plt.axis('equal')
plt.show()

### 8. Salary proposals

In [None]:
# Filter out the rows where 'Salary' is not null, not equal to 'None', and not equal to {'Min': None, 'Max': None'}
filtered_df = df[df['Salary'].notnull() & (df['Salary'] != "None") & (df['Salary'] != "{'Min': None, 'Max': None}")]

# Count the occurrences of each salary value
salary_counts = filtered_df['Salary'].value_counts()

# Create a pie chart
plt.figure(figsize=(8, 6))
plt.pie(salary_counts, labels=salary_counts.index, autopct='%1.1f%%')

# Set the title
plt.title('Salary Values')

# Show the plot
plt.show()

### 9. Min and max salary proposals

In [None]:
# Extract minimum and maximum values from 'Salary' column
df['Min Salary'] = df['Salary'].apply(lambda x: int(re.search(r"'Min': (\d+)", x).group(1)) if pd.notnull(x) and re.search(r"'Min': (\d+)", x) else None)
df['Max Salary'] = df['Salary'].apply(lambda x: int(re.search(r"'Max': (\d+)", x).group(1)) if pd.notnull(x) and re.search(r"'Max': (\d+)", x) else None)

# Print the 'Min Salary' and 'Max Salary' columns
print(df[['Min Salary', 'Max Salary']])

### 10. Min and max salaries with Experience

In [None]:
# Group the data by 'Experience' and calculate the min and max salaries
experience_stats = filtered_df.groupby('Experience')['Salary'].agg(['min', 'max']).reset_index()

# Create the bar plot
plt.figure(figsize=(8, 6))
plt.bar(experience_stats['Experience'], experience_stats['max'], label='Max Salary')
plt.bar(experience_stats['Experience'], experience_stats['min'], label='Min Salary')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.title('Max and Min Salary by Years of Experience')
plt.legend()
plt.xticks(experience_stats['Experience'])  # Set x-axis tick values to be the years of experience

# Add text labels with min and max salaries
for i in range(len(experience_stats)):
    plt.text(experience_stats['Experience'][i], experience_stats['max'][i], str(experience_stats['max'][i]), ha='center', va='bottom')
    plt.text(experience_stats['Experience'][i], experience_stats['min'][i], str(experience_stats['min'][i]), ha='center', va='bottom')

# Show the bar plot
plt.show()