# Data Analysis
Looking at our data and deriving columns / defining correlations

In [9]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

In [10]:
#setting up path to database and engine

load_dotenv()
database_url = os.getenv('DATABASE_URL')

engine = create_engine(database_url)

In [None]:
# Creating view for hourly wage

with engine.connect() as connection:
    connection.execute(text("CREATE VIEW hourly_wage AS SELECT employeeid, (salary_usd / 2080) AS hourly_wage FROM employee WHERE employment_type = 'FT'"))
    connection.commit()

In [39]:
# Remote work statistics

print("Average remote ratio for employees working at company in their country: \n",
    pd.read_sql('' \
    'SELECT AVG(remote_ratio) ' \
    'FROM employee e JOIN company c ON e.companyid = c.companyid ' \
    'WHERE e.countryid = c.countryid', engine))


print("\nAverage remote ratio for employees NOT working at company in their country: \n",
    pd.read_sql('' \
    'SELECT AVG(remote_ratio) ' \
    'FROM employee e JOIN company c ON e.companyid = c.companyid ' \
    'WHERE e.countryid <> c.countryid', engine))


print("\nAverage remote ratio for employees working at small company: \n",
    pd.read_sql('' \
    'SELECT AVG(remote_ratio) ' \
    'FROM employee e JOIN company c ON e.companyid = c.companyid ' \
    "WHERE c.size = 'S'", engine))

print("\nAverage remote ratio for employees working at medium company: \n",
    pd.read_sql('' \
    'SELECT AVG(remote_ratio) ' \
    'FROM employee e JOIN company c ON e.companyid = c.companyid ' \
    "WHERE c.size = 'M'", engine))

print("\nAverage remote ratio for employees working at large company: \n",
    pd.read_sql('' \
    'SELECT AVG(remote_ratio) ' \
    'FROM employee e JOIN company c ON e.companyid = c.companyid ' \
    "WHERE c.size = 'L'", engine))

Average remote ratio for employees working at company in their country: 
          avg
0  31.571568

Average remote ratio for employees NOT working at company in their country: 
          avg
0  86.538462

Average remote ratio for employees working at small company: 
          avg
0  67.819149

Average remote ratio for employees working at medium company: 
          avg
0  31.392918

Average remote ratio for employees working at large company: 
          avg
0  34.519231


In [42]:
print("Average salary by job: \n",
      pd.read_sql('' \
      'SELECT job_title, AVG(salary_usd) AS average_salary ' \
      'FROM employee GROUP BY job_title ORDER BY average_salary', engine))

Average salary by job: 
                           job_title  average_salary
0          Principal Data Architect    38154.000000
1                  CRM Data Analyst    40000.000000
2           Compliance Data Analyst    45000.000000
3                   Insight Analyst    50090.642857
4     Quantitative Research Analyst    51000.000000
..                              ...             ...
150              AWS Data Architect   258000.000000
151  Managing Director Data Science   280000.000000
152        Head of Machine Learning   299758.428571
153          Data Science Tech Lead   375000.000000
154   Analytics Engineering Manager   399880.000000

[155 rows x 2 columns]
