**Numpy problems**


# 1. Basic Array Operations


In [4]:
import numpy as np
import pandas as pd

# Load the dataset
url = r'C:\Users\grace\Downloads\auto-mpg.csv'
df = pd.read_csv(url)

# Show first few rows
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [None]:
mpg_array = df['mpg'].values
print("Mean MPG:", np.mean(mpg_array))
print("Median MPG:", np.median(mpg_array))
print("Standard Deviation of MPG:", np.std(mpg_array))
print("Number of cars with MPG > 25:", np.sum(mpg_array > 25))

Mean MPG: 23.514572864321607
Median MPG: 23.0
Standard Deviation of MPG: 7.806159061274433
Number of cars with MPG > 25: 158


In [None]:
print(df.columns)

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name'],
      dtype='object')



# 2. Filtering Cars with More Than 6 Cylinders

In [None]:
filtered = df[df['cylinders'] > 6]
car_names = filtered['car name'].tolist()
car_names[:5]  # Show first 5

['chevrolet chevelle malibu',
 'buick skylark 320',
 'plymouth satellite',
 'amc rebel sst',
 'ford torino']


# 3. Statistical Analysis

In [5]:
weight = df['weight'].values
print("25th percentile:", np.percentile(weight, 25))
print("50th percentile (median):", np.percentile(weight, 50))
print("75th percentile:", np.percentile(weight, 75))

25th percentile: 2223.75
50th percentile (median): 2803.5
75th percentile: 3608.0



# 4. Array Manipulation

In [None]:
acc = df['acceleration'].to_numpy()
acc_norm = (acc - acc.min()) / (acc.max() - acc.min())
print(acc_norm[:5])

[0.23809524 0.20833333 0.17857143 0.23809524 0.14880952]



# 5. Broadcasting

In [None]:
df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce')


hp = df['horsepower'].to_numpy()
hp = np.where(np.isnan(hp), np.nanmean(hp), hp)

hp_new = hp * 1.1
print(hp_new[:5])

[143.  181.5 165.  165.  154. ]



# 6. Boolean Indexing

In [None]:
displacement = df['displacement'].values
origin = df['origin'].values
avg_disp = displacement[origin == 2].mean()
print(f"Average displacement of European cars: {avg_disp:.2f}")


Average displacement of European cars: 109.14



# 7. Matrix Operations

In [None]:
matrix = df[['mpg', 'horsepower', 'weight']].replace('?', np.nan).dropna().astype(float).values
vector = np.array([1, 0.5, -0.2])
dot_product = matrix.dot(vector)
dot_product[:5]

array([-617.8, -641.1, -594.2, -595.6, -602.8])

 
 # 8. Sorting

In [None]:
sorted_df = df.sort_values(by='model year', ascending=False)
sorted_df['car name'].head()

397         chevy s-10
396        ford ranger
395      dodge rampage
394          vw pickup
393    ford mustang gl
Name: car name, dtype: object


# 9. Correlation

In [None]:
mpg = df['mpg'].values
weight = df['weight'].values
corr = np.corrcoef(mpg, weight)[0, 1]
corr

np.float64(-0.8317409332443352)

 # 10.Conditional Aggregates


In [None]:
grouped = df.groupby('cylinders')['mpg'].mean()
grouped

cylinders
3    20.550000
4    29.286765
5    27.366667
6    19.985714
8    14.963107
Name: mpg, dtype: float64

**Pandas problem**

# 1. Basic Exploration

In [None]:
import pandas as pd

# Display the first 10 rows
df.head(10)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,1,ford torino
5,15.0,8,429.0,198.0,4341,10.0,70,1,ford galaxie 500
6,14.0,8,454.0,220.0,4354,9.0,70,1,chevrolet impala
7,14.0,8,440.0,215.0,4312,8.5,70,1,plymouth fury iii
8,14.0,8,455.0,225.0,4425,10.0,70,1,pontiac catalina
9,15.0,8,390.0,190.0,3850,8.5,70,1,amc ambassador dpl


In [None]:
print("Shape:", df.shape)
print(df.describe())

Shape: (398, 9)
              mpg   cylinders  displacement  horsepower       weight  \
count  398.000000  398.000000    398.000000  392.000000   398.000000   
mean    23.514573    5.454774    193.425879  104.469388  2970.424623   
std      7.815984    1.701004    104.269838   38.491160   846.841774   
min      9.000000    3.000000     68.000000   46.000000  1613.000000   
25%     17.500000    4.000000    104.250000   75.000000  2223.750000   
50%     23.000000    4.000000    148.500000   93.500000  2803.500000   
75%     29.000000    8.000000    262.000000  126.000000  3608.000000   
max     46.600000    8.000000    455.000000  230.000000  5140.000000   

       acceleration  model year      origin  
count    398.000000  398.000000  398.000000  
mean      15.568090   76.010050    1.572864  
std        2.757689    3.697627    0.802055  
min        8.000000   70.000000    1.000000  
25%       13.825000   73.000000    1.000000  
50%       15.500000   76.000000    1.000000  
75%       17.

# 2. Filtering and Indexing

In [None]:
filtered = df[(df['model year'] == 75) & (df['weight'] < 3000)]
filtered = filtered[['car name', 'weight', 'mpg']]
print(filtered)


              car name  weight   mpg
167     toyota corolla    2171  29.0
168         ford pinto    2639  23.0
169        amc gremlin    2914  20.0
170      pontiac astro    2592  23.0
171      toyota corona    2702  24.0
172  volkswagen dasher    2223  25.0
173         datsun 710    2545  24.0
174         ford pinto    2984  18.0
175  volkswagen rabbit    1937  29.0
177         audi 100ls    2694  23.0
178        peugeot 504    2957  23.0
179        volvo 244dl    2945  22.0
180          saab 99le    2671  25.0
181   honda civic cvcc    1795  33.0


# 3. Handling Missing Data


In [None]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('C:/Users/grace/Downloads/auto-mpg.csv')

# Replace '?' with NaN and convert horsepower to float
df['horsepower'] = df['horsepower'].replace('?', np.nan).astype(float)

# Show missing values before fill
print("Missing values before filling:")
print(df.isnull().sum())

# Fill missing values with median (safe assignment)
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

# Show missing values after fill
print("\nMissing values after filling:")
print(df.isnull().sum())



Missing values before filling:
mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64

Missing values after filling:
mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64


 
 # 4 Data Transformation

In [None]:
df['power_to_weight_ratio'] = df['horsepower'] / df['weight']
print(df[['car name', 'power_to_weight_ratio']].head())


                    car name  power_to_weight_ratio
0  chevrolet chevelle malibu               0.037100
1          buick skylark 320               0.044679
2         plymouth satellite               0.043655
3              amc rebel sst               0.043694
4                ford torino               0.040591



 # 5 Group By

In [None]:
mean_mpg_by_origin = df.groupby('origin')['mpg'].mean()
print(mean_mpg_by_origin)


origin
1    20.083534
2    27.891429
3    30.450633
Name: mpg, dtype: float64



# 6 Sorting

In [None]:
sorted_df = df.sort_values(by='mpg', ascending=False)
print(sorted_df[['car name', 'mpg']].head(10))


                            car name   mpg
322                        mazda glc  46.6
329              honda civic 1500 gl  44.6
325             vw rabbit c (diesel)  44.3
394                        vw pickup  44.0
326               vw dasher (diesel)  43.4
244  volkswagen rabbit custom diesel  43.1
309                        vw rabbit  41.5
330             renault lecar deluxe  40.9
324                       datsun 210  40.8
247                   datsun b210 gx  39.4



# 7 Apply Function

In [None]:
def performance_score(row):
    return row['mpg'] * row['acceleration'] / row['weight']

df['performance_score'] = df.apply(performance_score, axis=1)
print(df[['car name', 'performance_score']].head())


                    car name  performance_score
0  chevrolet chevelle malibu           0.061644
1          buick skylark 320           0.046710
2         plymouth satellite           0.057625
3              amc rebel sst           0.055928
4                ford torino           0.051754



# 8 Visualization Preparation

In [None]:
summary = df.groupby('model year')[['mpg', 'weight', 'horsepower']].mean()
print(summary)



                  mpg       weight  horsepower
model year                                    
70          17.689655  3372.793103  147.827586
71          21.250000  2995.428571  106.553571
72          18.714286  3237.714286  120.178571
73          17.100000  3419.025000  130.475000
74          22.703704  2877.925926   94.203704
75          20.266667  3176.800000  101.066667
76          21.573529  3078.735294  101.117647
77          23.375000  2997.357143  105.071429
78          24.061111  2861.805556   99.694444
79          25.093103  3055.344828  101.206897
80          33.696552  2436.655172   78.586207
81          30.334483  2522.931034   81.465517
82          31.709677  2453.548387   81.854839



# 9 Exporting Data

In [None]:
high_mpg_cars = df[df['mpg'] > 30][['mpg', 'cylinders', 'horsepower', 'weight']]
high_mpg_cars.to_csv(r'C:\Users\grace\Desktop\high_mpg_cars.csv', index=False)


In [None]:
# Read and display the saved CSV
high_mpg_preview = pd.read_csv(r'C:\Users\grace\Desktop\high_mpg_cars.csv')
high_mpg_preview.head()  # Show the first few rows


Unnamed: 0,mpg,cylinders,horsepower,weight
0,31.0,4,65.0,1773
1,35.0,4,69.0,1613
2,31.0,4,67.0,1950
3,32.0,4,65.0,1836
4,31.0,4,52.0,1649


# 10. Finding Anomalies

In [None]:
Q1 = df['mpg'].quantile(0.25)
Q3 = df['mpg'].quantile(0.75)
IQR = Q3 - Q1
outliers = df[(df['mpg'] < (Q1 - 1.5 * IQR)) | (df['mpg'] > (Q3 + 1.5 * IQR))]
print(outliers[['car name', 'mpg', 'model year']])


      car name   mpg  model year
322  mazda glc  46.6          80
