### CREATING DATAFRAME USING FAKER
CREATING A DUMMY DATASET USING FAKER PACKAGE

In [None]:
#IMPORTING LIBRARIES TO USE
import pandas as pd
from faker import Faker
import numpy as np
#INSTANTIATE FAKER OBJECT
fake = Faker()
Faker.seed(27)

In [None]:
# CREATING A DATA FRAME FOR EMPLOYEES
num_empl = 500
departments = ['Engineering', 'Finance', 'HR', 'Marketing', 'Sales', 'IT']
yrs_with_comp = np.random.randint(1, 10, size = num_empl)
salary = 40000 + 2000 * yrs_with_comp * np.random.randn()
employee_data = {'EmployeeID': np.arange(1, num_empl + 1),
                'FirstName': [fake.first_name() for _ in range(num_empl)],
                'LastName': [fake.last_name() for _ in range(num_empl)],
                'Age': np.random.randint(22, 60, size = num_empl),
                'Department': [fake.random_element(departments) for _ in range(num_empl)],
                'Salary': np.round(salary),
                'yearsWithCompany': yrs_with_comp
                }
df_employees = pd.DataFrame(employee_data)
# Displaying 10 random rows
df_employees.head(10)

In [None]:
# Information on the data frame df_employees
df_employees.info()

In [None]:
# Displaying the number of rows and column of the df
df_employees.shape

In [None]:
# Basic statistics of the data frame
df_employees.describe()

In [None]:
# Displaying the correlation between Age, Salary and YearsWithCompany
df_employees[['Age','Salary', 'yearsWithCompany']].corr()

In [None]:
# PIVOT TABLE FOR SUM OF SALARY BASE ON YRS OF EXPERIENCE AND DEPARTMENT
pd.pivot_table(df_employees, values = 'Salary', index = 'yearsWithCompany', columns = 'Department', aggfunc ='sum')

#### SCATTER PLOT

In [None]:
# SCATTER PLOT: AGE VS SALARY
df_employees.plot.scatter(x = 'Age', y = 'Salary', title = 'SCATTER PLOT: Age vs Salary', xlabel = 'Age',
                          ylabel = 'Salary', grid = True)

### LINE PLOT

In [None]:
# LINE PLOT: Average Salary Trend Over Years of Experience
avr_sal_by_exp = df_employees.groupby('yearsWithCompany')['Salary'].mean()
df_employees['AverageSalaryByExperience'] = df_employees['yearsWithCompany'].map(avr_sal_by_exp)

df_employees.plot.line(x = 'yearsWithCompany', y = 'AverageSalaryByExperience', marker ='o', linestyle = '-',
                        title = 'Average Salary Trend Over Years of Experience',
                       xlabel = 'Years With Company', ylabel = 'Average Salary', legend = False, grid = True)

### HISTOGRAM

In [None]:
# Histogram: Distribution of ages
df_employees['Age'].plot.hist(xlabel ='Age', title = 'Age Distribution', bins=15)

### BOX PLOT

In [None]:
# Box Plot: Salary distribution by Department
df_employees.boxplot(column = 'Salary', by = 'Department', grid = True, vert = False)

### BAR PLOT

In [None]:
# Bar Plot: Department-wise employee count
df_employees['Department'].value_counts().plot.bar(title = 'Employee count by Department')

### AREA PLOT

In [None]:
# Area Plot: Cumulative Salary Distribution Over Ages Groups
df_employees['AgeGroup'] = pd.cut(df_employees['Age'], bins = [20, 30, 40, 50, 60], labels = ['20-29', '30-39', '40-49', '50-59'])
cumulative_salary_by_age_group = df_employees.groupby('AgeGroup')['Salary'].cumsum()
df_employees['CumulativeSalaryByAgeGroup'] = cumulative_salary_by_age_group

df_employees.plot.area( x = 'AgeGroup', y = 'CumulativeSalaryByAgeGroup', title = 'Cumulative Salary Distribution Over Age Groups',
                      xlabel = 'Age Group', ylabel = 'Cumulative Salary', legend = False, grid = True)

### PIE CHART

In [None]:
# Pie Chart: Department_wise Salary distribution
df_employees.groupby('Department')['Salary'].sum().plot.pie(title = 'Department-wise Salary Distribution', autopct= '%1.1f%%')