In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import euclidean


In [2]:
data_new = {
    'Alternatif Camera': ['Canon EOS 300D', 'Canon EOS 10D', 'Canon EOS 4000D','Nikon D40',
                            'Nikon D70', 'Sony A3000', 'Pentax K-30','Nikon D60',
                            'Sony A5000', 'Canon EOS 1000D', 'Canon EOS 2000D','Canon EOS 200D', 
                            'Canon EOS 1300D', 'Sony A5100'],
    'Brand Kamera': ['Canon', 'Canon', 'Canon', 'Nikon', 'Nikon', 'Sony', 'Pentax', 'Nikon',
                        'Sony', 'Canon', 'Canon', 'Canon', 'Canon', 'Sony'],
    'Jenis Sensor': ['APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C',
                'APS-C', 'APS-C', 'APS-C', 'APS-C'],
    'Color Depth': ['21', '21', '21.9', '21', '20.4', '23.7', '23.7', '22.5', '23.8', '22', '22.6', '23.6', '22', '23.8'],
    'Dynamic Range': ['10.8', '10.9', '11.4', '11', '10.3', '12.8', '13', '11.4', '13', '10.9', '11.9', '13.4', '11.7', '11.7'],
    'LowLight ISO': ['544', '571', '695', '561', '529', '1068', '1129', '562', '1089', '719',
                 '1009', '1041', '781', '1347']
}

df_new = pd.DataFrame(data_new)

In [3]:
# Remove non-numeric characters and convert to numeric
df_new['Brand Kamera'] = pd.to_numeric(df_new['Brand Kamera'].replace('[\$,]', '', regex=True), errors='coerce')
df_new['Jenis Sensor'] = pd.to_numeric(df_new['Jenis Sensor'].replace('[^\d.]', '', regex=True), errors='coerce')
df_new['Color Depth'] = pd.to_numeric(df_new['Color Depth'], errors='coerce')
df_new['Dynamic Range'] = pd.to_numeric(df_new['Dynamic Range'], errors='coerce')
df_new['LowLight ISO'] = pd.to_numeric(df_new['LowLight ISO'], errors='coerce')


In [4]:
# Extract criteria columns
criteria_columns = ['Brand Kamera', 'Jenis Sensor', 'Color Depth', 'Dynamic Range', 'LowLight ISO']

# ----- Analytic Hierarchy Process (AHP) -----
# Pairwise comparison matrix for criteria
criteria_comparison_matrix = pd.DataFrame(index=criteria_columns, columns=criteria_columns, dtype=float)

# Normalize the criteria comparison matrix
criteria_comparison_matrix /= criteria_comparison_matrix.sum(axis=1)

# Calculate criteria weights
criteria_weights = criteria_comparison_matrix.mean(axis=1)

# Normalize the criteria values
df_normalized_ahp = df_new.copy()
for column in criteria_columns:
    df_normalized_ahp[column] = df_new[column] / df_new[column].max()

# Calculate the weighted sum for each smartphone
df_normalized_ahp['AHP_Score'] = df_normalized_ahp[criteria_columns].dot(criteria_weights)

# Rank the smartphones based on the AHP scores
df_ranked_ahp = df_normalized_ahp.sort_values(by='AHP_Score', ascending=False).reset_index(drop=True)

print("\nRanked Dataset (AHP):")
print(df_ranked_ahp[['Alternatif Camera', 'AHP_Score']])



Ranked Dataset (AHP):
   Alternatif Camera  AHP_Score
0     Canon EOS 300D        NaN
1      Canon EOS 10D        NaN
2    Canon EOS 4000D        NaN
3          Nikon D40        NaN
4          Nikon D70        NaN
5         Sony A3000        NaN
6        Pentax K-30        NaN
7          Nikon D60        NaN
8         Sony A5000        NaN
9    Canon EOS 1000D        NaN
10   Canon EOS 2000D        NaN
11    Canon EOS 200D        NaN
12   Canon EOS 1300D        NaN
13        Sony A5100        NaN


In [5]:
df = pd.read_csv("data-kamera.csv")
df.head()

Unnamed: 0,Nama Kamera,Brand Kamera,Jenis Sensor,Color Depth,Dynamic Range,LowLight ISO,Price,Link
0,Canon EOS 300D,Canon,APS-C,21.0,10.8,544,5500000,https://www.tokopedia.com/itabeshop/canon-eos-...
1,Canon EOS 10D,Canon,APS-C,21.0,10.9,571,1200000,https://www.tokopedia.com/afdlan/camera-canon-...
2,Nikon Coolpix P340,Nikon,"1/1.7""",20.7,11.9,273,2838000,https://shopee.co.id/Nikon-Coolpix-P340-i.2095...
3,Nikon Coolpix P330,Nikon,"1/1.7""",21.0,11.7,213,1150000,https://www.tokopedia.com/kamera2nd/kamera-nik...
4,Panasonic Lumix DMC FX150,Panasonic,"1/1.7""",18.4,9.6,101,310000,https://www.tokopedia.com/sasukeinside/panason...


In [6]:
# Data Cleaning:
# i) Mencari dan Memproses Data yang Hilang
missing_data = df.isnull().sum()
print("Data yang Hilang:\n", missing_data)

# ii) Nilai Data Campuran/Tipe Data yang Tidak Cocok
mixed_data = df.applymap(type).nunique()
print("\nTipe Data Campuran:\n", mixed_data)

# iii) Pencarian Pencilan pada Data
# Menghitung z-score untuk kolom numerik
numeric_cols = df.select_dtypes(include=['int', 'float']).columns
z_scores = (df[numeric_cols] - df[numeric_cols].mean()) / df[numeric_cols].std()

# Menandai baris yang memiliki nilai z-score > 3 atau < -3 sebagai outlier
outliers = df[(z_scores > 3) | (z_scores < -3)]
print("\nData Pencilan:\n", outliers)

Data yang Hilang:
 Nama Kamera      0
Brand Kamera     0
Jenis Sensor     1
Color Depth      0
Dynamic Range    0
LowLight ISO     0
Price            0
Link             0
dtype: int64

Tipe Data Campuran:
 Nama Kamera      1
Brand Kamera     1
Jenis Sensor     2
Color Depth      1
Dynamic Range    1
LowLight ISO     1
Price            1
Link             1
dtype: int64

Data Pencilan:
    Nama Kamera Brand Kamera Jenis Sensor  Color Depth  Dynamic Range  \
0          NaN          NaN          NaN          NaN            NaN   
1          NaN          NaN          NaN          NaN            NaN   
2          NaN          NaN          NaN          NaN            NaN   
3          NaN          NaN          NaN          NaN            NaN   
4          NaN          NaN          NaN          NaN            NaN   
..         ...          ...          ...          ...            ...   
89         NaN          NaN          NaN          NaN            NaN   
90         NaN          NaN         

In [7]:
# Data Transformation
# i) Data Aggregation
# ii) Data Normalization
# iii) Feature Selection/Sampling

# Menghapus tanda koma dari nilai-nilai dalam kolom 'Price'
df['Price'] = df['Price'].str.replace(',', '')

# Mengonversi tipe data kolom 'Price' ke tipe data numerik
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# i) Agregasi Data
data_diagregasi = df.groupby('Nama Kamera').agg({'Price': 'mean', 'Dynamic Range': 'sum'}).reset_index()
print("Data yang Diagregasi:\n", data_diagregasi)

# ii) Normalisasi Data
scaler = MinMaxScaler()
data_dinormalisasi = scaler.fit_transform(df[['Price', 'Dynamic Range']])
df_dinormalisasi = pd.DataFrame(data_dinormalisasi, columns=[ 'Price', 'Dynamic Range'])
print("\nData yang Dinormalisasi:\n", df_dinormalisasi)

# iii) Seleksi Fitur/Pemilihan Sampel
x = df[['Dynamic Range', 'Price']]
y = df['LowLight ISO']
selector = SelectKBest(score_func=f_regression, k=1)
X_terpilih = selector.fit_transform(x, y)
print("\nFitur yang Dipilih:\n", X_terpilih)

Data yang Diagregasi:
           Nama Kamera      Price  Dynamic Range
0     Canon EOS 1000D  2400000.0           10.9
1       Canon EOS 10D  1200000.0           10.9
2     Canon EOS 1100D  3200000.0           11.0
3     Canon EOS 1200D  3300000.0           11.3
4     Canon EOS 1300D  4950000.0           11.7
..                ...        ...            ...
89     Sony Alpha 290  2300000.0           11.5
90        Sony NEX-5T  3300000.0           13.0
91          Sony NEX3  2300000.0           12.0
92  Sony SLT Alpha 33  3100000.0           12.6
93  Sony SLT Alpha 37  3400000.0           12.9

[94 rows x 3 columns]

Data yang Dinormalisasi:
        Price  Dynamic Range
0   0.563518       0.315789
1   0.096634       0.342105
2   0.274484       0.605263
3   0.091205       0.552632
4   0.000000       0.000000
..       ...            ...
89  0.492942       0.868421
90  0.422258       0.473684
91  0.509228       0.736842
92  0.473616       0.552632
93  0.536374       0.921053

[94 rows x 2 c

In [8]:
# Pisahkan variabel independen dan dependen
X = df[['Dynamic Range', 'Price']]
y = df['LowLight ISO']

# Bagi dataset menjadi training set (80%) dan test set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Bagi training set menjadi training set (75%) dan validation set (25%)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Print jumlah sampel di setiap set
print("Jumlah sampel dalam setiap set:")
print("Training set:", len(X_train))
print("Validation set:", len(X_val))
print("Test set:", len(X_test))

Jumlah sampel dalam setiap set:
Training set: 56
Validation set: 19
Test set: 19


In [9]:
# Data Transforamtion:
# i) Standardization
scaler_standard = StandardScaler()
X_train_standardized = scaler_standard.fit_transform(X_train)
X_val_standardized = scaler_standard.transform(X_val)
X_test_standardized = scaler_standard.transform(X_test)

# ii) Normalization
scaler_minmax = MinMaxScaler()
X_train_normalized = scaler_minmax.fit_transform(X_train)
X_val_normalized = scaler_minmax.transform(X_val)
X_test_normalized = scaler_minmax.transform(X_test)

# Menampilkan hasil penskalaan
print("Hasil penskalaan:")
print("Standardization - Training set:\n", X_train_standardized)
print("\nStandardization - Validation set:\n", X_val_standardized)
print("\nStandardization - Test set:\n", X_test_standardized)
print("\nNormalization - Training set:\n", X_train_normalized)
print("\nNormalization - Validation set:\n", X_val_normalized)
print("\nNormalization - Test set:\n", X_test_normalized)

Hasil penskalaan:
Standardization - Training set:
 [[-0.35806823 -0.73760697]
 [-0.99463398 -0.14188321]
 [-0.14587965 -0.31404737]
 [-1.31291685 -1.3929031 ]
 [ 0.59678039  0.87084718]
 [ 0.38459181  0.57238958]
 [-0.14587965 -0.97589647]
 [ 2.08210046  1.01977812]
 [-0.14587965 -0.6482484 ]
 [-1.20682256  0.39426817]
 [ 1.55162901  0.51341292]
 [-0.14587965 -0.14188321]
 [ 1.76381759  0.54319911]
 [-0.14587965  1.28785381]
 [-0.7824454  -0.49931746]
 [-0.25197394  0.09640629]
 [-0.57025681 -0.97589647]
 [-0.57025681  0.47320157]
 [-1.31291685 -1.1546136 ]
 [ 1.76381759 -0.49991319]
 [ 1.02115755  1.78468742]
 [ 1.86991188  1.13892287]
 [ 1.44553472  1.5559295 ]
 [-0.14587965 -0.20145558]
 [-1.10072827 -1.33333072]
 [-1.10072827 -0.20145558]
 [-0.7824454  -0.16571216]
 [-0.46416252 -0.55888984]
 [ 1.12725184  0.98998597]
 [ 0.38459181  0.79459454]
 [-0.03978536 -0.02273846]
 [-0.25197394 -0.61846221]
 [ 1.6577233   0.03683392]
 [-1.31291685 -1.33333072]
 [-1.10072827  3.68266331]
 [-0

In [10]:
# Periksa dan bersihkan nama kolom
df_new.columns = df_new.columns.str.strip()

# Verifikasi dan olah kolom 'Price'
if 'Price' in df_new.columns:
    df_new['Price'] = pd.to_numeric(df_new['Price'].replace('[\$,]', '', regex=True), errors='coerce')
else:
    print("Kolom 'Price' tidak ditemukan. Kolom yang tersedia:", df_new.columns)

# Verifikasi dan olah kolom 'Jenis Sensor'
if 'Jenis Sensor' in df_new.columns:
    df_new['Jenis Sensor'] = pd.to_numeric(df_new['Jenis Sensor'].replace('[^\d.]', '', regex=True), errors='coerce')
else:
    print("Kolom 'Jenis Sensor' tidak ditemukan. Kolom yang tersedia:", df_new.columns)

# Verifikasi dan olah kolom 'Color Depth'
if 'Color Depth' in df_new.columns:
    df_new['Color Depth'] = pd.to_numeric(df_new['Color Depth'].replace('[^\d.]', '', regex=True), errors='coerce')
else:
    print("Kolom 'Color Depth' tidak ditemukan. Kolom yang tersedia:", df_new.columns)

# Verifikasi dan olah kolom 'Dynamic Range'
if 'Dynamic Range' in df_new.columns:
    df_new['Dynamic Range'] = pd.to_numeric(df_new['Dynamic Range'].replace('[^\d.]', '', regex=True), errors='coerce')
else:
    print("Kolom 'Dynamic Range' tidak ditemukan. Kolom yang tersedia:", df_new.columns)

# Verifikasi dan olah kolom 'LowLight ISO'
if 'LowLight ISO' in df_new.columns:
    df_new['LowLight ISO'] = pd.to_numeric(df_new['LowLight ISO'].replace('[^\d.]', '', regex=True), errors='coerce')
else:
    print("Kolom 'LowLight ISO' tidak ditemukan. Kolom yang tersedia:", df_new.columns)


Kolom 'Price' tidak ditemukan. Kolom yang tersedia: Index(['Alternatif Camera', 'Brand Kamera', 'Jenis Sensor', 'Color Depth',
       'Dynamic Range', 'LowLight ISO'],
      dtype='object')


In [11]:
# Extract criteria columns
criteria_columns = ['Price', 'Jenis Sensor', 'Color Depth', 'Dynamic Range', 'LowLight ISO']

In [12]:
# ----- Analytic Hierarchy Process (AHP) -----
# Pairwise comparison matrix for criteria
criteria_comparison_matrix = pd.DataFrame(index=criteria_columns, columns=criteria_columns, dtype=float)

# Manually set the pairwise comparison values (you can adjust these values based on your judgment)
# criteria_comparison_matrix.loc['Price'] = ['5,500,000', '4,250,000', '3,650,000', '2,500,000', '4,850,000']
# criteria_comparison_matrix.loc['Jenis Sensor'] = ['APS-C', 'APS-C', 'APS-C', 'APS-C', 'APS-C']
# criteria_comparison_matrix.loc['Color Depth'] = [21, 23.8, 21.9, 22.5, 23.5]
# criteria_comparison_matrix.loc['Dynamic Range'] = [10.8, 13, 11.4, 11.4, 12.9]
# criteria_comparison_matrix.loc['LowLight ISO'] = [544, 1089, 695, 562, 1272]



In [13]:
# Normalize the criteria comparison matrix
criteria_comparison_matrix /= criteria_comparison_matrix.sum(axis=1)

In [14]:
# Calculate criteria weights
criteria_weights = criteria_comparison_matrix.mean(axis=1)

In [15]:
# Normalize the criteria values
df_normalized_ahp = df.copy()
for column in criteria_columns:
    df_normalized_ahp[column] = df[column] / df[column].max()

TypeError: '>=' not supported between instances of 'str' and 'float'

In [None]:
# Calculate the weighted sum for each Camera
df_normalized_ahp['AHP_Score'] = df_normalized_ahp[criteria_columns].dot(criteria_weights)

In [None]:
# Rank the Camera based on the AHP scores
df_ranked_ahp = df_normalized_ahp.sort_values(by='AHP_Score', ascending=False).reset_index(drop=True)

In [None]:
print(df_ranked_ahp)

                  Nama Kamera Brand Kamera  Jenis Sensor  Color Depth  \
0              Canon EOS 300D        Canon           NaN     0.882353   
1               Canon EOS 10D        Canon           NaN     0.882353   
2          Nikon Coolpix P340        Nikon      0.272093     0.869748   
3          Nikon Coolpix P330        Nikon      0.272093     0.882353   
4   Panasonic Lumix DMC FX150    Panasonic      0.272093     0.773109   
..                        ...          ...           ...          ...   
89             Canon EOS M100        Canon           NaN     0.987395   
90              Canon EOS M10        Canon           NaN     0.924370   
91          Olympus PEN E-PL7      Olympus      1.000000     0.953782   
92    Panasonic Lumix DMC-GM1    Panasonic      1.000000     0.936975   
93               Pentax K-500       Pentax           NaN     0.995798   

    Dynamic Range  LowLight ISO     Price  \
0        0.805970      0.403860  0.577731   
1        0.813433      0.423905  

In [None]:
print("\nRanked Dataset (AHP):")
print(df_ranked_ahp[['Alternatif Camera', 'AHP_Score']])
print(df_ranked_ahp.columns)


Ranked Dataset (AHP):


KeyError: "['Alternatif Camera'] not in index"