### Import CSAT Data

In [107]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from pyodbc import connect
from sqlalchemy import create_engine
import urllib 
import os
from dotenv import load_dotenv

In [108]:
# Function to fetch data from our Database
def fetch_data(query):

    # Load variables from .env file, change path to your local .env file
    load_dotenv('OneDrive/Documentos/SQL Server Management Studio/BusinessAnalysisProject/variables.env')

    # Access variables
    server = f'{os.getenv("SERVER_NAME")}'
    database = os.getenv("DB")
    user = os.getenv("USER")
    password = os.getenv("PASSWORD")

    # Connect to Database
    conn_string = f'mssql+pyodbc://{user}:{urllib.parse.quote_plus(password)}@{server}/{database}?driver=ODBC+Driver+17+for+SQL+Server'
    
    engine = create_engine(conn_string)
    
    df = pd.read_sql_query(query, engine)

    return df

In [109]:
query = 'SELECT * FROM FactCSATSurveyData;'

# Store qury result into a dataframe
df = fetch_data(query)

In [110]:
df.head()

Unnamed: 0,CustomerID,SurveyYear,SurveyQuarter,SurveyDate,ResponseDate,RecommendationRate,CompanyRate,UserFrequency,QualityRate,PanelUsabilityRate,ReportCapability,SurveyID
0,CRM-1516050,2022,3,2022-07-01,2022-08-26,10.0,5.0,Once a Month,5.0,5.0,I need someone from Insider team to provide me...,1
1,CRM-1516050,2022,3,2022-07-01,2022-09-27,10.0,5.0,Once a Month,5.0,5.0,I don't use it often,2
2,CRM-1470950,2022,1,2022-01-01,2022-03-25,4.0,3.0,Once a Day,2.0,3.0,I need someone from Insider team to provide me...,3
3,CRM-1470950,2022,3,2022-07-01,2022-09-30,6.0,3.0,Once a Week,4.0,4.0,I tried but could not find everything I need,4
4,CRM-1470950,2022,2,2022-04-01,2022-06-23,10.0,5.0,Once a Day,4.0,3.0,I need someone from Insider team to provide me...,5


In [111]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1989 entries, 0 to 1988
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   CustomerID          1989 non-null   object 
 1   SurveyYear          1989 non-null   int64  
 2   SurveyQuarter       1989 non-null   int64  
 3   SurveyDate          1989 non-null   object 
 4   ResponseDate        1989 non-null   object 
 5   RecommendationRate  1989 non-null   float64
 6   CompanyRate         1989 non-null   float64
 7   UserFrequency       1989 non-null   object 
 8   QualityRate         1989 non-null   float64
 9   PanelUsabilityRate  1989 non-null   float64
 10  ReportCapability    1989 non-null   object 
 11  SurveyID            1989 non-null   int64  
dtypes: float64(4), int64(3), object(5)
memory usage: 186.6+ KB


### Sentiment Analysis

In [114]:
import nltk 
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [115]:
# Download lexicon 
nltk.download('vader_lexicon')

# Initialize analyzer
analyzer = SentimentIntensityAnalyzer()

# Calculate sentiment scores, list as input
def sentiment_scores(report):
    # Get sentiment scores for list of provide values
    sentiment = analyzer.polarity_scores(report)
    # Return just compound values 
    return sentiment['compound']

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\alvar\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
# Evaluate scores to place them into categories
def sentiment_categories(score):
    # Create a category list
    categories = ['Positive','Mixed positive','Neutral','Mixed Negative','Negative']

    
    