
# Converting Textual Numeric Columns to Numeric

**Author:** IT24104194 – Kirushayanan K.

This notebook extracts numeric values from columns like `Mileage`, `Engine`, `Power` and `New_Price`.


In [None]:

import pandas as pd
import numpy as np
import re

train_df = pd.read_csv('../data/train-data.csv', index_col=False)
test_df = pd.read_csv('../data/test-data.csv', index_col=False)

for df in [train_df, test_df]:
    if 'Unnamed: 0' in df.columns:
        df.drop(columns=['Unnamed: 0'], inplace=True)
    df.drop_duplicates(inplace=True)

def parse_numeric(value):
    if pd.isnull(value):
        return np.nan
    match = re.search(r"([0-9]*\.?[0-9]+)", str(value))
    return float(match.group(1)) if match else np.nan

def parse_price(value):
    if pd.isnull(value):
        return np.nan
    match = re.search(r"([0-9]*\.?[0-9]+)", str(value))
    if match:
        num = float(match.group(1))
        if 'Cr' in str(value) or 'cr' in str(value).lower():
            return num * 100
        return num
    return np.nan

for df in [train_df, test_df]:
    df['Mileage_Num'] = df['Mileage'].apply(parse_numeric)
    df['Engine_CC'] = df['Engine'].apply(parse_numeric)
    df['Power_BHP'] = df['Power'].apply(parse_numeric)
    df['New_Price_Num'] = df['New_Price'].apply(parse_price)

train_df[['Mileage','Mileage_Num','Engine','Engine_CC']].head()
