# 🧱 Correcting Structural Errors

This notebook demonstrates how to fix structural issues in tabular data, such as wrong column names, extra index columns, or untidy layouts.

In [1]:
import pandas as pd

## 🧹 Example 1: Clean Column Names

In [2]:
df1 = pd.DataFrame({
    ' Student Name ': ['Alice', 'Bob'],
    'Test Score': [85, 92],
    'Test Score ': [85, 92]
})
# Clean and unify column names
df1.columns = df1.columns.str.strip().str.lower().str.replace(' ', '_')
df1

Unnamed: 0,student_name,test_score,test_score.1
0,Alice,85,85
1,Bob,92,92


## 🗂️ Example 2: Remove Extra Index Column from CSV

In [3]:
df2 = pd.DataFrame({
    'Unnamed: 0': [0, 1, 2],
    'name': ['Alice', 'Bob', 'Carol'],
    'score': [85, 88, 91]
})
# Drop unnamed index column
df2 = df2.loc[:, ~df2.columns.str.contains('^Unnamed')]
df2

Unnamed: 0,name,score
0,Alice,85
1,Bob,88
2,Carol,91


## 🧱 Example 3: Promote First Row to Column Headers

In [4]:
df3 = pd.DataFrame([
    ['Name', 'Score'],
    ['Alice', 85],
    ['Bob', 90]
])
# Promote first row to header
df3.columns = df3.iloc[0]
df3 = df3[1:].reset_index(drop=True)
df3

Unnamed: 0,Name,Score
0,Alice,85
1,Bob,90


## 🔄 Example 4: Reshape Wide Table to Long Format

In [5]:
df4 = pd.DataFrame({
    'student': ['Alice', 'Bob'],
    'math_2023': [90, 88],
    'math_2024': [92, 85]
})

# Melt wide to long format
df4_long = pd.melt(df4, id_vars='student', var_name='subject_year', value_name='score')
df4_long

Unnamed: 0,student,subject_year,score
0,Alice,math_2023,90
1,Bob,math_2023,88
2,Alice,math_2024,92
3,Bob,math_2024,85
