# Hypothesis: The amount of incoming first-year students that have no experience will be more than 75%.
 

In [170]:
%reload_ext autoreload
%autoreload 2

## First I will read in the csv and convert it to a column oriented data table.

In [171]:
from data_utils import read_csv_rows, columnar

data_rows: list[dict[str, str]] = read_csv_rows('../../data/survey.csv')
columns : dict[str, list[str]] = columnar(data_rows)

## Next I will select the experience column and show the first ten rows: 

In [172]:
from data_utils import head, select
category: dict[str, list[str]] = select(columns, ["prior_exp"])
head(category, 10)

{'prior_exp': ['1-2 years',
  '2-6 months',
  '2-6 months',
  '2-6 months',
  '7-12 months',
  '1-2 years',
  '7-12 months',
  'None to less than one month!',
  '7-12 months',
  '2-6 months']}

## Now I will select the status column and show the first ten rows: 

In [173]:
from data_utils import head, select
cat_two: dict[str, list[str]] = select(columns, ["unc_status"])
head(cat_two, 10)

{'unc_status': ['Returning UNC Student',
  'Returning UNC Student',
  'Returning UNC Student',
  'Incoming Transfer Student',
  'Incoming First-year Student',
  'Returning UNC Student',
  'Returning UNC Student',
  'Returning UNC Student',
  'Returning UNC Student',
  'Returning UNC Student']}

## Now I will show the totals for each category in the experience columns and the status columns.

In [174]:
from data_utils import count
totals = count(category['prior_exp'])
stats = count(cat_two['unc_status'])

print(f"Total responses: {totals} ")
print(f"Status responses: {stats} ")

Total responses: {'1-2 years': 16, '2-6 months': 119, '7-12 months': 25, 'None to less than one month!': 332, 'Over 2 years': 10} 
Status responses: {'Returning UNC Student': 295, 'Incoming Transfer Student': 24, 'Incoming First-year Student': 178, 'part-time transfer student': 1, 'Transfered in fall of 2020': 1, 'Returning UNC student/Transfer student': 1, 'First-year': 1, 'Current Freshman': 1} 


## Now I will produce list of bool on whether or not the student has past experience.

In [175]:
def lots_exp(col: list[str], threshold: str) -> list[bool]:
    result: list[bool] = []
    for item in col:
        if item == threshold:
            result.append(False)
        else:
            result.append(True)
    return result

exp_mask: list[bool] = lots_exp(category["prior_exp"], "None to less than one month!")
print(exp_mask)

[True, True, True, True, True, True, True, False, True, True, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, True, True, False, False, False, True, False, True, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, True, False, True, False, False, False, False, True, True, True, False, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, True, True, False, True, True, False, False, True, True, False, False, False, False, False, False, True, False, False, True, True, False, False, True, False, True, False, True, True, False, True, False, True, True, False, True, False, False, True, True, True, True, False, True, False, False, True, True, F

## Now I will create the mask that only shows the students that have had no experience.

In [176]:
def masked(col: list[str], mask: list[bool]) -> list[str]:
    result: list[str] = []
    for i in range(len(mask)):
        if mask[i]:
            result.append(col[i])
    return result

last_mask: list[str] = masked(columns["unc_status"], exp_mask)
print(last_mask)

['Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'Incoming Transfer Student', 'Incoming First-year Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'Incoming First-year Student', 'Returning UNC Student', 'Returning UNC Student', 'Incoming First-year Student', 'Returning UNC Student', 'Incoming First-year Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'Incoming First-year Student', 'Returning UNC Student', 'Incoming First-year Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'part-time transfer student', 'Returning UNC Student', 'Incoming First-year Student', 'Incoming First-year Student', 'Incoming First-year Student', 'Incoming First-year Student', 'Returning UNC Student', 'Incoming First-year Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student', 'Returning UNC Student',

## Now I will count the number of incoming first years students with no experience and compare it to the amount of first year students in general

In [177]:
second_counts = count(last_mask)  

print(f"Number of students who are incoming first year students and have no experience: {second_counts['Incoming First-year Student']} ")
print(f"Total number of Incoming first years: {stats['Incoming First-year Student']} ")

Number of students who are incoming first year students and have no experience: 68 
Total number of Incoming first years: 178 


# In Conclusion: I was really far off because 68/178 is about 38 percent. 

I thought this was a really interesting topic to look into because I thought that more incoming first-year students would have no experience. 