In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from percent import percentage_of
from formatNum import format_float_num
from constValues import survey_numeric, survey_codebook, survey_values

"""
.value_counts()
Returns object containing counts of unique values.
The resulting object will be in descending order so that the first element is 
the most frequently-occurring element. Excludes NA values by default.

.loc[]
Purely label-location based indexer for selection by label.

.astype()
Cast a pandas object to a specified type 

respondentsRF  - respondents from Russia
"""
numeric_file = pd.read_csv(survey_numeric, low_memory=False)
code_file = pd.read_csv(survey_codebook, low_memory=False)
values_file = pd.read_csv(survey_values, low_memory=False)
respondents_RF = numeric_file['CountryNumeric2'].value_counts().loc[138].astype(int)

# 138 RF country code
# q8Student = 1  => student
"""
students_RF - respondents from Russia which are students 
stud_RF_percent - percentage of respondents which are students from Russia
"""
students_RF = (numeric_file.loc[(numeric_file['CountryNumeric2'] == 138) &
                                (numeric_file['q8Student'] == 1)])['q8Student'].value_counts().loc[1].astype(int)
stud_RF_percent = format_float_num(percentage_of(students_RF, respondents_RF))

"""
.sum()
Return the sum of the values for the requested axis

respondents_roles - total number of roles 
web_dev_roles - number of web developers 
data_science_roles - number of data science developers 
mobile_dev_roles - number of mobile developers 
web_dev_percent - web developer's percentage of the total number of all respondents 
data_science_dev - data science developer's percentage of the total number of all respondents 
mobile_dev_percent - mobile developer's percentage of the total number of all respondents 
"""
respondents_roles = numeric_file['q9CurrentRole'].value_counts().sum()
web_dev_roles = numeric_file['q9CurrentRole'].value_counts().loc['1'].astype(int)
data_science_roles = numeric_file['q9CurrentRole'].value_counts().loc['6'].astype(int)
mobile_dev_roles = numeric_file['q9CurrentRole'].value_counts().loc['7'].astype(int)
web_dev_percent = format_float_num(percentage_of(web_dev_roles, respondents_roles))
data_science_percent = format_float_num(percentage_of(data_science_roles, respondents_roles))
mobile_dev_percent = format_float_num(percentage_of(mobile_dev_roles, respondents_roles))

"""
.isin(values)
Return boolean DataFrame showing whether each element in the DataFrame is contained in values.

true_answers_RF - number of respondents from Russia who answered all questions correctly 
answer_true_percent - percentage of respondents from Russia who answered all questions
correctly of the total number of Russian respondents  
"""
true_answers_RF = values_file.loc[values_file['CountryNumeric2'] == 'Russian Federation']\
    .index.isin(code_file.index).sum()
answer_true_percent = format_float_num(percentage_of(true_answers_RF, respondents_RF))


def draw_RF_stud():
    # create data
    names = 'students', 'other',
    size = [stud_RF_percent, 100 - stud_RF_percent]

    # Create a circle for the center of the plot
    my_circle = plt.Circle((0, 0), 0.7, color='white')

    plt.title('Всего из РФ: ' + str(respondents_RF))
    plt.pie(size, labels=names, wedgeprops={'linewidth': 7, 'edgecolor': 'white'},
            autopct='%1.1f%%', startangle=90)
    fig = plt.gcf()
    fig.gca().add_artist(my_circle)
    plt.show()


def draw_percent_of_roles():
    # create data
    names = 'web', 'data', 'mob', 'other',
    size = [web_dev_percent, data_science_percent,
            mobile_dev_percent,
            100 - (web_dev_percent + data_science_percent + mobile_dev_percent)]

    # Create a circle for the center of the plot
    my_circle = plt.Circle((0, 0), 0.7, color='white')
    plt.title('Чем занимаются опрошенные:')
    plt.pie(size, labels=names, wedgeprops={'linewidth': 7, 'edgecolor': 'white'},
            autopct='%1.1f%%', startangle=120)
    fig = plt.gcf()
    fig.gca().add_artist(my_circle)
    plt.show()


def draw_true_answer():
    # create data
    names = 'Answ true', 'other',
    size = [answer_true_percent, 100 - answer_true_percent]

    # Create a circle for the center of the plot
    my_circle = plt.Circle((0, 0), 0.7, color='white')

    plt.title('Ответили правильно: ')
    plt.pie(size, labels=names, wedgeprops={'linewidth': 7, 'edgecolor': 'white'},
            autopct='%1.1f%%', startangle=90)
    fig = plt.gcf()
    fig.gca().add_artist(my_circle)
    plt.show()


print('Число опрошенных программистов из России:', respondents_RF)
print('Процент от программистов из России - студенты: ', stud_RF_percent)
print('Процент от общего числа опрошенных занимается Web разработкой',
      web_dev_percent)
print('Процент от общего числа опрошенных занимается Data Science',
      data_science_percent)
print('Процент от общего числа опрошенных занимается мобильной разработкой',
      mobile_dev_percent)
print('Какой процент программистов из России правильно ответили на каждый '
      'вопрос: ', answer_true_percent)
draw_percent_of_roles()
draw_RF_stud()
draw_true_answer()


