#### cleaning students survey data

In [38]:
import pandas as pd
students_data=pd.read_csv("students_data.csv")
students_data.head()

Unnamed: 0,I enjoy learning mathematics,I feel confident when solving math problems,Mathematics is useful in real life,I get anxious during mathematics lessons,I believe I can perform well in mathematics,There are enough mathematics textbooks for all students,"The school provides mathematical instruments (rulers, compasses, sets)",Teachers use relevant teaching aids like charts and models,Students have access to practice tests and past papers for mathematics,Learning materials are updated to reflect the current syllabus,NAME OF THE SCHOOL.,SUB-COUNTY,COUNTY,GENDER
0,Agree,Agree,Strongly agree,Disagree,Agree,Disagree,Strongly Disagree,Disagree,Disagree,Agree,Kaudha_Mixed,Wagai,Siaya,Male
1,Strongly Agree,Disagree,Neutral,Neutral,Agree,Strongly Disagree,Agree,Disagree,Agree,Neutral,Kaudha_Mixed,Wagai,Siaya,Female
2,Strongly Agree,Agree,Strongly agree,Agree,Agree,Agree,Strongly Disagree,Disagree,Strongly Agree,Agree,Kaudha_Mixed,Wagai,Siaya,Male
3,Strongly Agree,Strongly agree,Strongly agree,Strongly Agree,Strongly agree,Agree,Neutral,Strongly Disagree,Strongly Disagree,Agree,Kaudha_Mixed,Wagai,Siaya,Male
4,Neutral,Neutral,Strongly disagree,Neutral,Strongly disagree,Neutral,Disagree,Neutral,Neutral,Strongly disagree,Kaudha_Mixed,Wagai,Siaya,Female


### uploading teachers data

In [39]:
teachers_data=pd.read_csv("teachers_data.csv")
teachers_data.head()

Unnamed: 0,Mathematics teachers in this school attend training workshops regularly.,The training has helped teachers improve how they teach mathematics.,The training programs cover the current curriculum and exams.,Teachers share what they learn from training with others,The school administration supports teachers to attend training programs,NAME OF THE SCHOOL,SUB-COUNTY,COUNTY,Score
0,Agree,Agree,Strongly agree,Agree,Agree,Kaudha_Mixed,Wagai,Siaya,
1,Strongly agree,Strongly Agree,,Strongly agree,Agree,Kaudha_Mixed,Wagai,Siaya,
2,Agree,Agree,Agree,Agree,Agree,Budokomi_Mixed,Busia,Busia,
3,Agree,Neutral,Neutral,Agree,Agree,Budokomi_Mixed,Busia,Busia,
4,Agree,Strongly Agree,Agree,Strongly agree,Agree,Malele_Mixed,Wagai,Siaya,


### uploading kcse data

In [40]:
kcse=pd.read_csv("kcse_data1.csv")
kcse.head()

Unnamed: 0,SCHOOL,GENDER,EXAM_YEAR,SUB-COUNTY,COUNTY,GRADE
0,Malele_Mixed,MALE,2024,Wagai,siaya,B+
1,Malele_Mixed,MALE,2024,Wagai,siaya,A
2,Malele_Mixed,MALE,2024,Wagai,siaya,C-
3,Malele_Mixed,MALE,2024,Wagai,siaya,B-
4,Malele_Mixed,FEMALE,2024,Wagai,siaya,D+


### kcse grades to points conversion

In [41]:
grades_map={'A':12,'A-':11,'B+':10,'B':9,'B-':8,'C+':7,'C':6,'C-':5,'D+':4,'D':3,'D-':2,'E':1}
kcse['GRADE_POINTS']=kcse['GRADE'].map(grades_map)
kcse.head()


Unnamed: 0,SCHOOL,GENDER,EXAM_YEAR,SUB-COUNTY,COUNTY,GRADE,GRADE_POINTS
0,Malele_Mixed,MALE,2024,Wagai,siaya,B+,10.0
1,Malele_Mixed,MALE,2024,Wagai,siaya,A,12.0
2,Malele_Mixed,MALE,2024,Wagai,siaya,C-,5.0
3,Malele_Mixed,MALE,2024,Wagai,siaya,B-,8.0
4,Malele_Mixed,FEMALE,2024,Wagai,siaya,D+,4.0


#### converting year column to string


In [42]:
# year to string
kcse['EXAM_YEAR']=kcse['EXAM_YEAR'].astype(str)
kcse.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1700 entries, 0 to 1699
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   SCHOOL        1700 non-null   object 
 1   GENDER        1700 non-null   object 
 2   EXAM_YEAR     1700 non-null   object 
 3   SUB-COUNTY    1700 non-null   object 
 4   COUNTY        1700 non-null   object 
 5   GRADE         1699 non-null   object 
 6   GRADE_POINTS  1698 non-null   float64
dtypes: float64(1), object(6)
memory usage: 93.1+ KB


### cleaning students_survey data

#### clean column names


In [43]:
students_data=students_data.columns.str.strip()

##### define likert scale mapping 

In [37]:
likert_map={"Strongly Disagree": 1,"Disagree": 2,"Neutral": 3,"Agree":4,"Strongly Agree": 5
}

#### List attitude statement columns

In [92]:
attitude_cols=[
    "I Enjoy Learning Mathematics",
    "I Feel Confident When Solving Math Problems",
    "Mathematics Is Useful In Real Life",
    "I Get Anxious During Mathematics Lessons",
    "I Believe I Can Perform Well In Mathematics"
]

### list availability of materials columns

In [93]:
materials_cols=[
    "There Are Enough Mathematics Textbooks For All Students",
    "The School Provides Mathematical Instruments (Rulers, Compasses, Sets)",
    "Teachers Use Relevant Teaching Aids Like Charts And Models",
    "Students Have Access To Practice Tests And Past Papers For Mathematics",
    "Learning Materials Are Updated To Reflect The Current Syllabus"
]

#### convert likert choices to numeric using likert scale mapping defined above

In [95]:
students_data[attitude_cols + materials_cols] = students_data[attitude_cols + materials_cols].apply(lambda x: x.str.strip().str.title())
for col in attitude_cols + materials_cols:
    students_data[col] = students_data[col].map(likert_map)

#### Reverse the anxiety column

In [96]:
#reversing the an xiety column
students_data["I Get Anxious During Mathematics Lessons"] = (
    6 - students_data["I Get Anxious During Mathematics Lessons"]
)

#### Compute row-wise average for attitude

In [97]:
# attitude average
students_data["Attitude"] = students_data[attitude_cols].mean(axis=1)

#### Compute Row-wise Average for materials

In [101]:
# material average
students_data["Availability of learning materials"]= students_data[materials_cols].mean(axis=1)

### Check few rows to accertain the result

In [102]:
students_data.head()

Unnamed: 0,I Enjoy Learning Mathematics,I Feel Confident When Solving Math Problems,Mathematics Is Useful In Real Life,I Get Anxious During Mathematics Lessons,I Believe I Can Perform Well In Mathematics,There Are Enough Mathematics Textbooks For All Students,"The School Provides Mathematical Instruments (Rulers, Compasses, Sets)",Teachers Use Relevant Teaching Aids Like Charts And Models,Students Have Access To Practice Tests And Past Papers For Mathematics,Learning Materials Are Updated To Reflect The Current Syllabus,Name Of The School.,Sub-County,County,Gender,Attitude,Availability of learning materials
0,4.0,4,5.0,4.0,4.0,2.0,1.0,2,2.0,4.0,Kaudha_Mixed,Wagai,Siaya,Male,4.2,2.2
1,5.0,2,3.0,3.0,4.0,1.0,4.0,2,4.0,3.0,Kaudha_Mixed,Wagai,Siaya,Female,3.4,2.8
2,5.0,4,5.0,2.0,4.0,4.0,1.0,2,5.0,4.0,Kaudha_Mixed,Wagai,Siaya,Male,4.0,3.2
3,5.0,5,5.0,1.0,5.0,4.0,3.0,1,1.0,4.0,Kaudha_Mixed,Wagai,Siaya,Male,4.2,2.6
4,3.0,3,1.0,3.0,1.0,3.0,2.0,3,3.0,1.0,Kaudha_Mixed,Wagai,Siaya,Female,2.2,2.4


##### confirm the data-types of the columns

In [103]:
students_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 604 entries, 0 to 603
Data columns (total 16 columns):
 #   Column                                                                  Non-Null Count  Dtype  
---  ------                                                                  --------------  -----  
 0   I Enjoy Learning Mathematics                                            600 non-null    float64
 1   I Feel Confident When Solving Math Problems                             604 non-null    int64  
 2   Mathematics Is Useful In Real Life                                      603 non-null    float64
 3   I Get Anxious During Mathematics Lessons                                602 non-null    float64
 4   I Believe I Can Perform Well In Mathematics                             600 non-null    float64
 5   There Are Enough Mathematics Textbooks For All Students                 602 non-null    float64
 6   The School Provides Mathematical Instruments (Rulers, Compasses, Sets)  603 non-nu

#### Save the new dataframe in csv format

In [104]:
# saving the new dataframe
students_data.to_csv("new_students_data.csv", index=False)

In [105]:
students_data.head()

Unnamed: 0,I Enjoy Learning Mathematics,I Feel Confident When Solving Math Problems,Mathematics Is Useful In Real Life,I Get Anxious During Mathematics Lessons,I Believe I Can Perform Well In Mathematics,There Are Enough Mathematics Textbooks For All Students,"The School Provides Mathematical Instruments (Rulers, Compasses, Sets)",Teachers Use Relevant Teaching Aids Like Charts And Models,Students Have Access To Practice Tests And Past Papers For Mathematics,Learning Materials Are Updated To Reflect The Current Syllabus,Name Of The School.,Sub-County,County,Gender,Attitude,Availability of learning materials
0,4.0,4,5.0,4.0,4.0,2.0,1.0,2,2.0,4.0,Kaudha_Mixed,Wagai,Siaya,Male,4.2,2.2
1,5.0,2,3.0,3.0,4.0,1.0,4.0,2,4.0,3.0,Kaudha_Mixed,Wagai,Siaya,Female,3.4,2.8
2,5.0,4,5.0,2.0,4.0,4.0,1.0,2,5.0,4.0,Kaudha_Mixed,Wagai,Siaya,Male,4.0,3.2
3,5.0,5,5.0,1.0,5.0,4.0,3.0,1,1.0,4.0,Kaudha_Mixed,Wagai,Siaya,Male,4.2,2.6
4,3.0,3,1.0,3.0,1.0,3.0,2.0,3,3.0,1.0,Kaudha_Mixed,Wagai,Siaya,Female,2.2,2.4


In [107]:
school_index = students_data.columns.get_loc("Name Of The School.")
print(school_index)

10


##### dropping columns before school name to get students_data3

In [108]:
students_data3 = students_data.iloc[:, 10:]

In [109]:
students_data3.head()

Unnamed: 0,Name Of The School.,Sub-County,County,Gender,Attitude,Availability of learning materials
0,Kaudha_Mixed,Wagai,Siaya,Male,4.2,2.2
1,Kaudha_Mixed,Wagai,Siaya,Female,3.4,2.8
2,Kaudha_Mixed,Wagai,Siaya,Male,4.0,3.2
3,Kaudha_Mixed,Wagai,Siaya,Male,4.2,2.6
4,Kaudha_Mixed,Wagai,Siaya,Female,2.2,2.4


In [110]:
# saving the third students data after dropping all columns before school name
students_data3.to_csv("students_data3.csv", index=False)

In [111]:
kcse.head()

Unnamed: 0,SCHOOL,GENDER,EXAM_YEAR,SUB-COUNTY,COUNTY,GRADE,GRADE_POINTS
0,Malele_Mixed,MALE,2024,Wagai,siaya,B+,10.0
1,Malele_Mixed,MALE,2024,Wagai,siaya,A,12.0
2,Malele_Mixed,MALE,2024,Wagai,siaya,C-,5.0
3,Malele_Mixed,MALE,2024,Wagai,siaya,B-,8.0
4,Malele_Mixed,FEMALE,2024,Wagai,siaya,D+,4.0


#### save processed kcse data

In [113]:
kcse.to_csv("kcse2.csv",index=False)