# NumPy Problems

## 1. Basic Array Operations

In [164]:
import numpy as np
import pandas as pd
data = pd.read_csv('auto-mpg.csv')

In [None]:
mpg_Array = np.array(data['mpg'])

print("Mean : ",mpg_Array.mean())
print("Median : ",np.median(mpg_Array))
print("Std_deviation : ",np.std(mpg_Array))

Num_cars = np.sum(mpg_Array > 25)
print("The number of cars with mpg greater than 25 : ", Num_cars)

Mean :  23.514572864321607
Median :  23.0
Std_deviation :  7.806159061274433
The number of cars with mpg greater than 25 :  158


## 2. Filtering

In [None]:
car_Array = np.array(data["car name"])
cylinder_Array = np.array(data["cylinders"])
car_name = car_Array[cylinder_Array > 6 ]
print(car_name.tolist())
print(type(car_name.tolist()))

['chevrolet chevelle malibu', 'buick skylark 320', 'plymouth satellite', 'amc rebel sst', 'ford torino', 'ford galaxie 500', 'chevrolet impala', 'plymouth fury iii', 'pontiac catalina', 'amc ambassador dpl', 'dodge challenger se', "plymouth 'cuda 340", 'chevrolet monte carlo', 'buick estate wagon (sw)', 'ford f250', 'chevy c20', 'dodge d200', 'hi 1200d', 'chevrolet impala', 'pontiac catalina brougham', 'ford galaxie 500', 'plymouth fury iii', 'dodge monaco (sw)', 'ford country squire (sw)', 'pontiac safari (sw)', 'chevrolet impala', 'pontiac catalina', 'plymouth fury iii', 'ford galaxie 500', 'amc ambassador sst', 'mercury marquis', 'buick lesabre custom', 'oldsmobile delta 88 royale', 'chrysler newport royal', 'amc matador (sw)', 'chevrolet chevelle concours (sw)', 'ford gran torino (sw)', 'plymouth satellite custom (sw)', 'buick century 350', 'amc matador', 'chevrolet malibu', 'ford gran torino', 'dodge coronet custom', 'mercury marquis brougham', 'chevrolet caprice classic', 'ford l

## 3. Statistical Analysis


In [None]:
weight_Array = np.array(data["weight"])
percentiles = np.percentile(weight_Array, [25, 50, 75])

print("25th percentile of weight:", percentiles[0])
print("50th percentile (median) of weight:", percentiles[1])
print("75th percentile of weight:", percentiles[2])

25th percentile of weight: 2223.75
50th percentile (median) of weight: 2803.5
75th percentile of weight: 3608.0


## 4. Array Manipulation


In [None]:
acceleration_Array = np.array(data['acceleration'])
min_value = np.min(acceleration_Array)
max_value = np.max(acceleration_Array)
normalised_value = (acceleration_Array - min_value)/(max_value - min_value)
print(normalised_value[:10])

[0.23809524 0.20833333 0.17857143 0.23809524 0.14880952 0.11904762
 0.05952381 0.0297619  0.11904762 0.0297619 ]


## 5. Broadcasting

In [None]:
horsepower_Array = pd.to_numeric(data['horsepower'], errors='coerce').to_numpy()
mean_horsepower = np.nanmean(horsepower_Array)
horsepower_Array[np.isnan(horsepower_Array)] = mean_horsepower
updated_horsepower = horsepower_Array * 1.10
print(updated_horsepower[:10])

[143.  181.5 165.  165.  154.  217.8 242.  236.5 247.5 209. ]


## 6. Boolean Indexing

In [None]:
origin_Array = np.array(data['origin'])
displacement_Array = np.array(data['displacement'])
displacement = displacement_Array[origin_Array == 2]
average_displacement = np.mean(displacement)
print(average_displacement)


109.14285714285714


## 7. Matrix Operations

In [None]:
mpg_Array
horsepower_Array
weight_Array

Array_2D = np.column_stack((mpg_Array, horsepower_Array, weight_Array))
vector = np.array([1, 0.5, -0.2])
dot_product = np.dot(Array_2D , vector)
print(dot_product[:10])

[-617.8 -641.1 -594.2 -595.6 -602.8 -754.2 -746.8 -740.9 -758.5 -660. ]


## 8. Sorting

In [None]:
modelYear_Array = np.array(data['model year'])
descended_modelYear = np.argsort(-modelYear_Array)
print(car_Array[descended_modelYear][:5])

['chevy s-10' 'ford ranger' 'dodge rampage' 'vw pickup' 'ford mustang gl']


## 9. Correlation

In [None]:
mpg_Array
weight_Array
correlation_matrix = np.corrcoef(mpg_Array, weight_Array)
Pearson_correlation_coefficient = correlation_matrix[0, 1]
print(Pearson_correlation_coefficient)

-0.8317409332443352


## 10. Conditional Aggregates

In [None]:
mpg_Array
cylinder_Array
cylinder_unique = np.unique(cylinder_Array)
print(cylinder_unique)

for group in cylinder_unique:
  mean_mpg = np.mean(mpg_Array[cylinder_Array == group] )
  print(f"mean mpg of cars with cylinder {group} : ", round(mean_mpg,2))


[3 4 5 6 8]
mean mpg of cars with cylinder 3 :  20.55
mean mpg of cars with cylinder 4 :  29.29
mean mpg of cars with cylinder 5 :  27.37
mean mpg of cars with cylinder 6 :  19.99
mean mpg of cars with cylinder 8 :  14.96


# Pandas Problems

## 1. Basic Exploration

In [None]:
import pandas as pd
print(data.head(10))
print(data.shape)


    mpg  cylinders  displacement  horsepower  weight  acceleration  \
0  18.0          8         307.0       130.0    3504          12.0   
1  15.0          8         350.0       165.0    3693          11.5   
2  18.0          8         318.0       150.0    3436          11.0   
3  16.0          8         304.0       150.0    3433          12.0   
4  17.0          8         302.0       140.0    3449          10.5   
5  15.0          8         429.0       198.0    4341          10.0   
6  14.0          8         454.0       220.0    4354           9.0   
7  14.0          8         440.0       215.0    4312           8.5   
8  14.0          8         455.0       225.0    4425          10.0   
9  15.0          8         390.0       190.0    3850           8.5   

   model year  origin                   car name  performance_score  \
0          70       1  chevrolet chevelle malibu           0.061644   
1          70       1          buick skylark 320           0.046710   
2          70   

In [None]:
print(data.describe())

              mpg   cylinders  displacement  horsepower       weight  \
count  398.000000  398.000000    398.000000  398.000000   398.000000   
mean    23.514573    5.454774    193.425879  104.304020  2970.424623   
std      7.815984    1.701004    104.269838   38.222625   846.841774   
min      9.000000    3.000000     68.000000   46.000000  1613.000000   
25%     17.500000    4.000000    104.250000   76.000000  2223.750000   
50%     23.000000    4.000000    148.500000   93.500000  2803.500000   
75%     29.000000    8.000000    262.000000  125.000000  3608.000000   
max     46.600000    8.000000    455.000000  230.000000  5140.000000   

       acceleration  model year      origin  performance_score  \
count    398.000000  398.000000  398.000000         398.000000   
mean      15.568090   76.010050    1.572864           0.148889   
std        2.757689    3.697627    0.802055           0.094161   
min        8.000000   70.000000    1.000000           0.026117   
25%       13.825000  

## 2. Filtering and Indexing

In [None]:
condition_data = data[(data['model year'] == 75) & (data['weight'] <3000)]
print(condition_data[['car name','weight','mpg']])

              car name  weight   mpg
167     toyota corolla    2171  29.0
168         ford pinto    2639  23.0
169        amc gremlin    2914  20.0
170      pontiac astro    2592  23.0
171      toyota corona    2702  24.0
172  volkswagen dasher    2223  25.0
173         datsun 710    2545  24.0
174         ford pinto    2984  18.0
175  volkswagen rabbit    1937  29.0
177         audi 100ls    2694  23.0
178        peugeot 504    2957  23.0
179        volvo 244dl    2945  22.0
180          saab 99le    2671  25.0
181   honda civic cvcc    1795  33.0


## 3. Handling Missing Data

In [None]:
print(data.isnull().sum())
data['horsepower'] = pd.to_numeric(data['horsepower'] ,errors='coerce')
median_horsepower = data['horsepower'].median()

data['horsepower'] = data['horsepower'].fillna(median_horsepower)
print(data['horsepower'] .head(10))


mpg                       0
cylinders                 0
displacement              0
horsepower                0
weight                    0
acceleration              0
model year                0
origin                    0
car name                  0
performance_score         0
 power_to_weight_ratio    0
dtype: int64
0    130.0
1    165.0
2    150.0
3    150.0
4    140.0
5    198.0
6    220.0
7    215.0
8    225.0
9    190.0
Name: horsepower, dtype: float64


## 4. Data Transformation

In [None]:
data[' power_to_weight_ratio'] = data['horsepower'] / data['weight']
print(data.head(5))

    mpg  cylinders  displacement  horsepower  weight  acceleration  \
0  18.0          8         307.0       130.0    3504          12.0   
1  15.0          8         350.0       165.0    3693          11.5   
2  18.0          8         318.0       150.0    3436          11.0   
3  16.0          8         304.0       150.0    3433          12.0   
4  17.0          8         302.0       140.0    3449          10.5   

   model year  origin                   car name  performance_score  \
0          70       1  chevrolet chevelle malibu           0.061644   
1          70       1          buick skylark 320           0.046710   
2          70       1         plymouth satellite           0.057625   
3          70       1              amc rebel sst           0.055928   
4          70       1                ford torino           0.051754   

    power_to_weight_ratio  
0                0.037100  
1                0.044679  
2                0.043655  
3                0.043694  
4           

## 5. Group By

In [None]:
groupby_origin = data.groupby('origin')
mpg_mean = groupby_origin['mpg'].mean()
print(mpg_mean)

origin
1    20.083534
2    27.891429
3    30.450633
Name: mpg, dtype: float64


## 6. Sorting

In [None]:
data_sorted = data.sort_values(by = 'mpg', ascending=False)
print(data_sorted['car name'].head(10) )

322                          mazda glc
329                honda civic 1500 gl
325               vw rabbit c (diesel)
394                          vw pickup
326                 vw dasher (diesel)
244    volkswagen rabbit custom diesel
309                          vw rabbit
330               renault lecar deluxe
324                         datsun 210
247                     datsun b210 gx
Name: car name, dtype: object


## Apply Function

In [None]:
def performance_score(row):
  return row['mpg'] * row['acceleration'] / row['weight']

data['performance_score'] = data.apply(performance_score, axis=1)
print(data.head(5))


    mpg  cylinders  displacement  horsepower  weight  acceleration  \
0  18.0          8         307.0       130.0    3504          12.0   
1  15.0          8         350.0       165.0    3693          11.5   
2  18.0          8         318.0       150.0    3436          11.0   
3  16.0          8         304.0       150.0    3433          12.0   
4  17.0          8         302.0       140.0    3449          10.5   

   model year  origin                   car name  performance_score  \
0          70       1  chevrolet chevelle malibu           0.061644   
1          70       1          buick skylark 320           0.046710   
2          70       1         plymouth satellite           0.057625   
3          70       1              amc rebel sst           0.055928   
4          70       1                ford torino           0.051754   

    power_to_weight_ratio  
0                0.037100  
1                0.044679  
2                0.043655  
3                0.043694  
4           

## 8. Visualization Preparation

In [None]:
modelyear_groupby = data.groupby('model year')
average_mpg_weight_horsepower = modelyear_groupby[['mpg', 'weight', 'horsepower']].mean()
print(average_mpg_weight_horsepower)

                  mpg       weight  horsepower
model year                                    
70          17.689655  3372.793103  147.827586
71          21.250000  2995.428571  106.553571
72          18.714286  3237.714286  120.178571
73          17.100000  3419.025000  130.475000
74          22.703704  2877.925926   94.203704
75          20.266667  3176.800000  101.066667
76          21.573529  3078.735294  101.117647
77          23.375000  2997.357143  105.071429
78          24.061111  2861.805556   99.694444
79          25.093103  3055.344828  101.206897
80          33.696552  2436.655172   78.586207
81          30.334483  2522.931034   81.465517
82          31.709677  2453.548387   81.854839


## 9. Exporting Data

In [None]:
high_mpg_cars = data[data['mpg'] > 30][['mpg', 'cylinders', 'horsepower', 'weight']]
high_mpg_cars.to_csv('high_mpg_cars.csv', index=False)
print(high_mpg_cars.head())

      mpg  cylinders  horsepower  weight
53   31.0          4        65.0    1773
54   35.0          4        69.0    1613
129  31.0          4        67.0    1950
131  32.0          4        65.0    1836
144  31.0          4        52.0    1649


## 10. Finding Anomalies

In [None]:
Q1 = data['mpg'].quantile(0.25)
Q3 = data['mpg'].quantile(0.75)
IQR = Q3 -Q1
print("IQR :", IQR)

outliers = data[(data['mpg'] < (Q1 - 1.5 * IQR)) | (data['mpg'] > (Q3 + 1.5 * IQR))]
outliers_data = outliers[['car name', 'mpg', 'model year'] ]
print(outliers_data)


IQR : 11.5
      car name   mpg  model year
322  mazda glc  46.6          80
